aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-08 11:18:31 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-08 11:18:31 -0700
commit1daf117f1d6b5056e27353fa289ef1bbcb619e8d (patch)
tree1cf8f7ee83d46d358d2f54dee322b9edf9665fd8
parentMerge tag 'fuse-update-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse (diff)
parentf2fs: use onstack pages instead of pvec (diff)
downloadlinux-dev-1daf117f1d6b5056e27353fa289ef1bbcb619e8d.tar.xz
linux-dev-1daf117f1d6b5056e27353fa289ef1bbcb619e8d.zip
Merge tag 'f2fs-for-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "In this cycle, we mainly fixed some corner cases that manipulate a per-file compression flag inappropriately. And, we found f2fs counted valid blocks in a section incorrectly when zone capacity is set, and thus, fixed it with additional sysfs entry to check it easily. Lastly, this series includes several patches with respect to the new atomic write support such as a couple of bug fixes and re-adding atomic_write_abort support that we removed by mistake in the previous release. Enhancements: - add sysfs entries to understand atomic write operations and zone capacity - introduce memory mode to get a hint for low-memory devices - adjust the waiting time of foreground GC - decompress clusters under softirq to avoid non-deterministic latency - do not skip updating inode when retrying to flush node page - enforce single zone capacity Bug fixes: - set the compression/no-compression flags correctly - revive F2FS_IOC_ABORT_VOLATILE_WRITE - check inline_data during compressed inode conversion - understand zone capacity when calculating valid block count As usual, the series includes several minor clean-ups and sanity checks" * tag 'f2fs-for-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (29 commits) f2fs: use onstack pages instead of pvec f2fs: intorduce f2fs_all_cluster_page_ready f2fs: clean up f2fs_abort_atomic_write() f2fs: handle decompress only post processing in softirq f2fs: do not allow to decompress files have FI_COMPRESS_RELEASED f2fs: do not set compression bit if kernel doesn't support f2fs: remove device type check for direct IO f2fs: fix null-ptr-deref in f2fs_get_dnode_of_data f2fs: revive F2FS_IOC_ABORT_VOLATILE_WRITE f2fs: fix to do sanity check on segment type in build_sit_entries() f2fs: obsolete unused MAX_DISCARD_BLOCKS f2fs: fix to avoid use f2fs_bug_on() in f2fs_new_node_page() f2fs: fix to remove F2FS_COMPR_FL and tag F2FS_NOCOMP_FL at the same time f2fs: introduce sysfs atomic write statistics f2fs: don't bother wait_ms by foreground gc f2fs: invalidate meta pages only for post_read required inode f2fs: allow compression of files without blocks f2fs: fix to check inline_data during compressed inode conversion f2fs: Delete f2fs_copy_page() and replace with memcpy_page() f2fs: fix to invalidate META_MAPPING before DIO write ...
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs30
-rw-r--r--Documentation/filesystems/f2fs.rst5
-rw-r--r--fs/f2fs/compress.c229
-rw-r--r--fs/f2fs/data.c82
-rw-r--r--fs/f2fs/debug.c2
-rw-r--r--fs/f2fs/f2fs.h102
-rw-r--r--fs/f2fs/file.c79
-rw-r--r--fs/f2fs/gc.c11
-rw-r--r--fs/f2fs/gc.h21
-rw-r--r--fs/f2fs/inode.c3
-rw-r--r--fs/f2fs/node.c14
-rw-r--r--fs/f2fs/segment.c79
-rw-r--r--fs/f2fs/segment.h11
-rw-r--r--fs/f2fs/super.c90
-rw-r--r--fs/f2fs/sysfs.c56
-rw-r--r--include/uapi/linux/f2fs.h2
16 files changed, 562 insertions, 254 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 9b583dd0298b..083ac2d63eef 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -580,3 +580,33 @@ Date: January 2022
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description: Controls max # of node block writes to be used for roll forward
recovery. This can limit the roll forward recovery time.
+
+What: /sys/fs/f2fs/<disk>/unusable_blocks_per_sec
+Date: June 2022
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: Shows the number of unusable blocks in a section which was defined by
+ the zone capacity reported by underlying zoned device.
+
+What: /sys/fs/f2fs/<disk>/current_atomic_write
+Date: July 2022
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Show the total current atomic write block count, which is not committed yet.
+ This is a read-only entry.
+
+What: /sys/fs/f2fs/<disk>/peak_atomic_write
+Date: July 2022
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Show the peak value of total current atomic write block count after boot.
+ If you write "0" here, you can initialize to "0".
+
+What: /sys/fs/f2fs/<disk>/committed_atomic_block
+Date: July 2022
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Show the accumulated total committed atomic write block count after boot.
+ If you write "0" here, you can initialize to "0".
+
+What: /sys/fs/f2fs/<disk>/revoked_atomic_block
+Date: July 2022
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Show the accumulated total revoked atomic write block count after boot.
+ If you write "0" here, you can initialize to "0".
diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index 98dc24f5c6f0..d0c09663dae8 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -336,6 +336,11 @@ discard_unit=%s Control discard unit, the argument can be "block", "segment"
default, it is helpful for large sized SMR or ZNS devices to
reduce memory cost by getting rid of fs metadata supports small
discard.
+memory=%s Control memory mode. This supports "normal" and "low" modes.
+ "low" mode is introduced to support low memory devices.
+ Because of the nature of low memory devices, in this mode, f2fs
+ will try to save memory sometimes by sacrificing performance.
+ "normal" mode is the default mode and same as before.
======================== ============================================================
Debugfs Entries
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 009e6c519e98..70e97075e535 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -729,14 +729,19 @@ out:
return ret;
}
-void f2fs_decompress_cluster(struct decompress_io_ctx *dic)
+static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic,
+ bool pre_alloc);
+static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic,
+ bool bypass_destroy_callback, bool pre_alloc);
+
+void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode);
struct f2fs_inode_info *fi = F2FS_I(dic->inode);
const struct f2fs_compress_ops *cops =
f2fs_cops[fi->i_compress_algorithm];
+ bool bypass_callback = false;
int ret;
- int i;
trace_f2fs_decompress_pages_start(dic->inode, dic->cluster_idx,
dic->cluster_size, fi->i_compress_algorithm);
@@ -746,41 +751,10 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic)
goto out_end_io;
}
- dic->tpages = page_array_alloc(dic->inode, dic->cluster_size);
- if (!dic->tpages) {
- ret = -ENOMEM;
- goto out_end_io;
- }
-
- for (i = 0; i < dic->cluster_size; i++) {
- if (dic->rpages[i]) {
- dic->tpages[i] = dic->rpages[i];
- continue;
- }
-
- dic->tpages[i] = f2fs_compress_alloc_page();
- if (!dic->tpages[i]) {
- ret = -ENOMEM;
- goto out_end_io;
- }
- }
-
- if (cops->init_decompress_ctx) {
- ret = cops->init_decompress_ctx(dic);
- if (ret)
- goto out_end_io;
- }
-
- dic->rbuf = f2fs_vmap(dic->tpages, dic->cluster_size);
- if (!dic->rbuf) {
- ret = -ENOMEM;
- goto out_destroy_decompress_ctx;
- }
-
- dic->cbuf = f2fs_vmap(dic->cpages, dic->nr_cpages);
- if (!dic->cbuf) {
- ret = -ENOMEM;
- goto out_vunmap_rbuf;
+ ret = f2fs_prepare_decomp_mem(dic, false);
+ if (ret) {
+ bypass_callback = true;
+ goto out_release;
}
dic->clen = le32_to_cpu(dic->cbuf->clen);
@@ -788,7 +762,7 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic)
if (dic->clen > PAGE_SIZE * dic->nr_cpages - COMPRESS_HEADER_SIZE) {
ret = -EFSCORRUPTED;
- goto out_vunmap_cbuf;
+ goto out_release;
}
ret = cops->decompress_pages(dic);
@@ -809,17 +783,13 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic)
}
}
-out_vunmap_cbuf:
- vm_unmap_ram(dic->cbuf, dic->nr_cpages);
-out_vunmap_rbuf:
- vm_unmap_ram(dic->rbuf, dic->cluster_size);
-out_destroy_decompress_ctx:
- if (cops->destroy_decompress_ctx)
- cops->destroy_decompress_ctx(dic);
+out_release:
+ f2fs_release_decomp_mem(dic, bypass_callback, false);
+
out_end_io:
trace_f2fs_decompress_pages_end(dic->inode, dic->cluster_idx,
dic->clen, ret);
- f2fs_decompress_end_io(dic, ret);
+ f2fs_decompress_end_io(dic, ret, in_task);
}
/*
@@ -829,7 +799,7 @@ out_end_io:
* (or in the case of a failure, cleans up without actually decompressing).
*/
void f2fs_end_read_compressed_page(struct page *page, bool failed,
- block_t blkaddr)
+ block_t blkaddr, bool in_task)
{
struct decompress_io_ctx *dic =
(struct decompress_io_ctx *)page_private(page);
@@ -839,12 +809,12 @@ void f2fs_end_read_compressed_page(struct page *page, bool failed,
if (failed)
WRITE_ONCE(dic->failed, true);
- else if (blkaddr)
+ else if (blkaddr && in_task)
f2fs_cache_compressed_page(sbi, page,
dic->inode->i_ino, blkaddr);
if (atomic_dec_and_test(&dic->remaining_pages))
- f2fs_decompress_cluster(dic);
+ f2fs_decompress_cluster(dic, in_task);
}
static bool is_page_in_cluster(struct compress_ctx *cc, pgoff_t index)
@@ -871,19 +841,26 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index)
return is_page_in_cluster(cc, index);
}
-bool f2fs_all_cluster_page_loaded(struct compress_ctx *cc, struct pagevec *pvec,
- int index, int nr_pages)
+bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages,
+ int index, int nr_pages, bool uptodate)
{
- unsigned long pgidx;
- int i;
+ unsigned long pgidx = pages[index]->index;
+ int i = uptodate ? 0 : 1;
- if (nr_pages - index < cc->cluster_size)
+ /*
+ * when uptodate set to true, try to check all pages in cluster is
+ * uptodate or not.
+ */
+ if (uptodate && (pgidx % cc->cluster_size))
return false;
- pgidx = pvec->pages[index]->index;
+ if (nr_pages - index < cc->cluster_size)
+ return false;
- for (i = 1; i < cc->cluster_size; i++) {
- if (pvec->pages[index + i]->index != pgidx + i)
+ for (; i < cc->cluster_size; i++) {
+ if (pages[index + i]->index != pgidx + i)
+ return false;
+ if (uptodate && !PageUptodate(pages[index + i]))
return false;
}
@@ -1552,16 +1529,85 @@ destroy_out:
return err;
}
-static void f2fs_free_dic(struct decompress_io_ctx *dic);
+static inline bool allow_memalloc_for_decomp(struct f2fs_sb_info *sbi,
+ bool pre_alloc)
+{
+ return pre_alloc ^ f2fs_low_mem_mode(sbi);
+}
+
+static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic,
+ bool pre_alloc)
+{
+ const struct f2fs_compress_ops *cops =
+ f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm];
+ int i;
+
+ if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc))
+ return 0;
+
+ dic->tpages = page_array_alloc(dic->inode, dic->cluster_size);
+ if (!dic->tpages)
+ return -ENOMEM;
+
+ for (i = 0; i < dic->cluster_size; i++) {
+ if (dic->rpages[i]) {
+ dic->tpages[i] = dic->rpages[i];
+ continue;
+ }
+
+ dic->tpages[i] = f2fs_compress_alloc_page();
+ if (!dic->tpages[i])
+ return -ENOMEM;
+ }
+
+ dic->rbuf = f2fs_vmap(dic->tpages, dic->cluster_size);
+ if (!dic->rbuf)
+ return -ENOMEM;
+
+ dic->cbuf = f2fs_vmap(dic->cpages, dic->nr_cpages);
+ if (!dic->cbuf)
+ return -ENOMEM;
+
+ if (cops->init_decompress_ctx) {
+ int ret = cops->init_decompress_ctx(dic);
+
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic,
+ bool bypass_destroy_callback, bool pre_alloc)
+{
+ const struct f2fs_compress_ops *cops =
+ f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm];
+
+ if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc))
+ return;
+
+ if (!bypass_destroy_callback && cops->destroy_decompress_ctx)
+ cops->destroy_decompress_ctx(dic);
+
+ if (dic->cbuf)
+ vm_unmap_ram(dic->cbuf, dic->nr_cpages);
+
+ if (dic->rbuf)
+ vm_unmap_ram(dic->rbuf, dic->cluster_size);
+}
+
+static void f2fs_free_dic(struct decompress_io_ctx *dic,
+ bool bypass_destroy_callback);
struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
{
struct decompress_io_ctx *dic;
pgoff_t start_idx = start_idx_of_cluster(cc);
- int i;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);
+ int i, ret;
- dic = f2fs_kmem_cache_alloc(dic_entry_slab, GFP_F2FS_ZERO,
- false, F2FS_I_SB(cc->inode));
+ dic = f2fs_kmem_cache_alloc(dic_entry_slab, GFP_F2FS_ZERO, false, sbi);
if (!dic)
return ERR_PTR(-ENOMEM);
@@ -1587,32 +1633,43 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
dic->nr_rpages = cc->cluster_size;
dic->cpages = page_array_alloc(dic->inode, dic->nr_cpages);
- if (!dic->cpages)
+ if (!dic->cpages) {
+ ret = -ENOMEM;
goto out_free;
+ }
for (i = 0; i < dic->nr_cpages; i++) {
struct page *page;
page = f2fs_compress_alloc_page();
- if (!page)
+ if (!page) {
+ ret = -ENOMEM;
goto out_free;
+ }
f2fs_set_compressed_page(page, cc->inode,
start_idx + i + 1, dic);
dic->cpages[i] = page;
}
+ ret = f2fs_prepare_decomp_mem(dic, true);
+ if (ret)
+ goto out_free;
+
return dic;
out_free:
- f2fs_free_dic(dic);
- return ERR_PTR(-ENOMEM);
+ f2fs_free_dic(dic, true);
+ return ERR_PTR(ret);
}
-static void f2fs_free_dic(struct decompress_io_ctx *dic)
+static void f2fs_free_dic(struct decompress_io_ctx *dic,
+ bool bypass_destroy_callback)
{
int i;
+ f2fs_release_decomp_mem(dic, bypass_destroy_callback, true);
+
if (dic->tpages) {
for (i = 0; i < dic->cluster_size; i++) {
if (dic->rpages[i])
@@ -1637,17 +1694,33 @@ static void f2fs_free_dic(struct decompress_io_ctx *dic)
kmem_cache_free(dic_entry_slab, dic);
}
-static void f2fs_put_dic(struct decompress_io_ctx *dic)
+static void f2fs_late_free_dic(struct work_struct *work)
{
- if (refcount_dec_and_test(&dic->refcnt))
- f2fs_free_dic(dic);
+ struct decompress_io_ctx *dic =
+ container_of(work, struct decompress_io_ctx, free_work);
+
+ f2fs_free_dic(dic, false);
+}
+
+static void f2fs_put_dic(struct decompress_io_ctx *dic, bool in_task)
+{
+ if (refcount_dec_and_test(&dic->refcnt)) {
+ if (in_task) {
+ f2fs_free_dic(dic, false);
+ } else {
+ INIT_WORK(&dic->free_work, f2fs_late_free_dic);
+ queue_work(F2FS_I_SB(dic->inode)->post_read_wq,
+ &dic->free_work);
+ }
+ }
}
/*
* Update and unlock the cluster's pagecache pages, and release the reference to
* the decompress_io_ctx that was being held for I/O completion.
*/
-static void __f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed)
+static void __f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed,
+ bool in_task)
{
int i;
@@ -1668,7 +1741,7 @@ static void __f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed)
unlock_page(rpage);
}
- f2fs_put_dic(dic);
+ f2fs_put_dic(dic, in_task);
}
static void f2fs_verify_cluster(struct work_struct *work)
@@ -1685,14 +1758,15 @@ static void f2fs_verify_cluster(struct work_struct *work)
SetPageError(rpage);
}
- __f2fs_decompress_end_io(dic, false);
+ __f2fs_decompress_end_io(dic, false, true);
}
/*
* This is called when a compressed cluster has been decompressed
* (or failed to be read and/or decompressed).
*/
-void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed)
+void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed,
+ bool in_task)
{
if (!failed && dic->need_verity) {
/*
@@ -1704,7 +1778,7 @@ void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed)
INIT_WORK(&dic->verity_work, f2fs_verify_cluster);
fsverity_enqueue_verify_work(&dic->verity_work);
} else {
- __f2fs_decompress_end_io(dic, failed);
+ __f2fs_decompress_end_io(dic, failed, in_task);
}
}
@@ -1713,12 +1787,12 @@ void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed)
*
* This is called when the page is no longer needed and can be freed.
*/
-void f2fs_put_page_dic(struct page *page)
+void f2fs_put_page_dic(struct page *page, bool in_task)
{
struct decompress_io_ctx *dic =
(struct decompress_io_ctx *)page_private(page);
- f2fs_put_dic(dic);
+ f2fs_put_dic(dic, in_task);
}
/*
@@ -1903,6 +1977,9 @@ int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi)
dev_t dev = sbi->sb->s_bdev->bd_dev;
char slab_name[32];
+ if (!f2fs_sb_has_compression(sbi))
+ return 0;
+
sprintf(slab_name, "f2fs_page_array_entry-%u:%u", MAJOR(dev), MINOR(dev));
sbi->page_array_slab_size = sizeof(struct page *) <<
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index ed503d50f95f..aa3ccddfa037 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -119,7 +119,7 @@ struct bio_post_read_ctx {
block_t fs_blkaddr;
};
-static void f2fs_finish_read_bio(struct bio *bio)
+static void f2fs_finish_read_bio(struct bio *bio, bool in_task)
{
struct bio_vec *bv;
struct bvec_iter_all iter_all;
@@ -133,8 +133,9 @@ static void f2fs_finish_read_bio(struct bio *bio)
if (f2fs_is_compressed_page(page)) {
if (bio->bi_status)
- f2fs_end_read_compressed_page(page, true, 0);
- f2fs_put_page_dic(page);
+ f2fs_end_read_compressed_page(page, true, 0,
+ in_task);
+ f2fs_put_page_dic(page, in_task);
continue;
}
@@ -191,7 +192,7 @@ static void f2fs_verify_bio(struct work_struct *work)
fsverity_verify_bio(bio);
}
- f2fs_finish_read_bio(bio);
+ f2fs_finish_read_bio(bio, true);
}
/*
@@ -203,7 +204,7 @@ static void f2fs_verify_bio(struct work_struct *work)
* can involve reading verity metadata pages from the file, and these verity
* metadata pages may be encrypted and/or compressed.
*/
-static void f2fs_verify_and_finish_bio(struct bio *bio)
+static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task)
{
struct bio_post_read_ctx *ctx = bio->bi_private;
@@ -211,7 +212,7 @@ static void f2fs_verify_and_finish_bio(struct bio *bio)
INIT_WORK(&ctx->work, f2fs_verify_bio);
fsverity_enqueue_verify_work(&ctx->work);
} else {
- f2fs_finish_read_bio(bio);
+ f2fs_finish_read_bio(bio, in_task);
}
}
@@ -224,7 +225,8 @@ static void f2fs_verify_and_finish_bio(struct bio *bio)
* that the bio includes at least one compressed page. The actual decompression
* is done on a per-cluster basis, not a per-bio basis.
*/
-static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx)
+static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx,
+ bool in_task)
{
struct bio_vec *bv;
struct bvec_iter_all iter_all;
@@ -237,7 +239,7 @@ static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx)
/* PG_error was set if decryption failed. */
if (f2fs_is_compressed_page(page))
f2fs_end_read_compressed_page(page, PageError(page),
- blkaddr);
+ blkaddr, in_task);
else
all_compressed = false;
@@ -262,15 +264,16 @@ static void f2fs_post_read_work(struct work_struct *work)
fscrypt_decrypt_bio(ctx->bio);
if (ctx->enabled_steps & STEP_DECOMPRESS)
- f2fs_handle_step_decompress(ctx);
+ f2fs_handle_step_decompress(ctx, true);
- f2fs_verify_and_finish_bio(ctx->bio);
+ f2fs_verify_and_finish_bio(ctx->bio, true);
}
static void f2fs_read_end_io(struct bio *bio)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
struct bio_post_read_ctx *ctx;
+ bool intask = in_task();
iostat_update_and_unbind_ctx(bio, 0);
ctx = bio->bi_private;
@@ -281,16 +284,29 @@ static void f2fs_read_end_io(struct bio *bio)
}
if (bio->bi_status) {
- f2fs_finish_read_bio(bio);
+ f2fs_finish_read_bio(bio, intask);
return;
}
- if (ctx && (ctx->enabled_steps & (STEP_DECRYPT | STEP_DECOMPRESS))) {
- INIT_WORK(&ctx->work, f2fs_post_read_work);
- queue_work(ctx->sbi->post_read_wq, &ctx->work);
- } else {
- f2fs_verify_and_finish_bio(bio);
+ if (ctx) {
+ unsigned int enabled_steps = ctx->enabled_steps &
+ (STEP_DECRYPT | STEP_DECOMPRESS);
+
+ /*
+ * If we have only decompression step between decompression and
+ * decrypt, we don't need post processing for this.
+ */
+ if (enabled_steps == STEP_DECOMPRESS &&
+ !f2fs_low_mem_mode(sbi)) {
+ f2fs_handle_step_decompress(ctx, intask);
+ } else if (enabled_steps) {
+ INIT_WORK(&ctx->work, f2fs_post_read_work);
+ queue_work(ctx->sbi->post_read_wq, &ctx->work);
+ return;
+ }
}
+
+ f2fs_verify_and_finish_bio(bio, intask);
}
static void f2fs_write_end_io(struct bio *bio)
@@ -1682,8 +1698,6 @@ sync_out:
*/
f2fs_wait_on_block_writeback_range(inode,
map->m_pblk, map->m_len);
- invalidate_mapping_pages(META_MAPPING(sbi),
- map->m_pblk, map->m_pblk);
if (map->m_multidev_dio) {
block_t blk_addr = map->m_pblk;
@@ -2223,7 +2237,7 @@ skip_reading_dnode:
if (f2fs_load_compressed_page(sbi, page, blkaddr)) {
if (atomic_dec_and_test(&dic->remaining_pages))
- f2fs_decompress_cluster(dic);
+ f2fs_decompress_cluster(dic, true);
continue;
}
@@ -2241,7 +2255,7 @@ submit_and_realloc:
page->index, for_write);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
- f2fs_decompress_end_io(dic, ret);
+ f2fs_decompress_end_io(dic, ret, true);
f2fs_put_dnode(&dn);
*bio_ret = NULL;
return ret;
@@ -2731,6 +2745,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
.submitted = false,
.compr_blocks = compr_blocks,
.need_lock = LOCK_RETRY,
+ .post_read = f2fs_post_read_required(inode),
.io_type = io_type,
.io_wbc = wbc,
.bio = bio,
@@ -2902,7 +2917,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
{
int ret = 0;
int done = 0, retry = 0;
- struct pagevec pvec;
+ struct page *pages[F2FS_ONSTACK_PAGES];
struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
struct bio *bio = NULL;
sector_t last_block;
@@ -2933,8 +2948,6 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
int submitted = 0;
int i;
- pagevec_init(&pvec);
-
if (get_dirty_pages(mapping->host) <=
SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
set_inode_flag(mapping->host, FI_HOT_DATA);
@@ -2960,13 +2973,13 @@ retry:
tag_pages_for_writeback(mapping, index, end);
done_index = index;
while (!done && !retry && (index <= end)) {
- nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
- tag);
+ nr_pages = find_get_pages_range_tag(mapping, &index, end,
+ tag, F2FS_ONSTACK_PAGES, pages);
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
+ struct page *page = pages[i];
bool need_readd;
readd:
need_readd = false;
@@ -2997,6 +3010,10 @@ readd:
if (!f2fs_cluster_is_empty(&cc))
goto lock_page;
+ if (f2fs_all_cluster_page_ready(&cc,
+ pages, i, nr_pages, true))
+ goto lock_page;
+
ret2 = f2fs_prepare_compress_overwrite(
inode, &pagep,
page->index, &fsdata);
@@ -3007,8 +3024,8 @@ readd:
} else if (ret2 &&
(!f2fs_compress_write_end(inode,
fsdata, page->index, 1) ||
- !f2fs_all_cluster_page_loaded(&cc,
- &pvec, i, nr_pages))) {
+ !f2fs_all_cluster_page_ready(&cc,
+ pages, i, nr_pages, false))) {
retry = 1;
break;
}
@@ -3098,7 +3115,7 @@ next:
if (need_readd)
goto readd;
}
- pagevec_release(&pvec);
+ release_pages(pages, nr_pages);
cond_resched();
}
#ifdef CONFIG_F2FS_FS_COMPRESSION
@@ -3408,12 +3425,11 @@ static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
struct inode *cow_inode = F2FS_I(inode)->cow_inode;
pgoff_t index = page->index;
int err = 0;
- block_t ori_blk_addr;
+ block_t ori_blk_addr = NULL_ADDR;
/* If pos is beyond the end of file, reserve a new block in COW inode */
if ((pos & PAGE_MASK) >= i_size_read(inode))
- return __reserve_data_block(cow_inode, index, blk_addr,
- node_changed);
+ goto reserve_block;
/* Look for the block in COW inode first */
err = __find_data_block(cow_inode, index, blk_addr);
@@ -3427,10 +3443,12 @@ static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
if (err)
return err;
+reserve_block:
/* Finally, we should reserve a new block in COW inode for the update */
err = __reserve_data_block(cow_inode, index, blk_addr, node_changed);
if (err)
return err;
+ inc_atomic_write_cnt(inode);
if (ori_blk_addr != NULL_ADDR)
*blk_addr = ori_blk_addr;
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index c92625ef16d0..c01471573977 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -39,7 +39,7 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi)
bimodal = 0;
total_vblocks = 0;
- blks_per_sec = BLKS_PER_SEC(sbi);
+ blks_per_sec = CAP_BLKS_PER_SEC(sbi);
hblks_per_sec = blks_per_sec / 2;
for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
vblocks = get_valid_blocks(sbi, segno, true);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 85921a1f2db8..3c7cdb70fe2e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -159,6 +159,7 @@ struct f2fs_mount_info {
int fsync_mode; /* fsync policy */
int fs_mode; /* fs mode: LFS or ADAPTIVE */
int bggc_mode; /* bggc mode: off, on or sync */
+ int memory_mode; /* memory mode */
int discard_unit; /*
* discard command's offset/size should
* be aligned to this unit: block,
@@ -229,7 +230,6 @@ enum {
#define CP_PAUSE 0x00000040
#define CP_RESIZE 0x00000080
-#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */
#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */
#define DEF_MID_DISCARD_ISSUE_TIME 500 /* 500 ms, if device busy */
@@ -598,6 +598,8 @@ enum {
#define RECOVERY_MAX_RA_BLOCKS BIO_MAX_VECS
#define RECOVERY_MIN_RA_BLOCKS 1
+#define F2FS_ONSTACK_PAGES 16 /* nr of onstack pages */
+
struct rb_entry {
struct rb_node rb_node; /* rb node located in rb-tree */
union {
@@ -757,6 +759,7 @@ enum {
FI_ENABLE_COMPRESS, /* enable compression in "user" compression mode */
FI_COMPRESS_RELEASED, /* compressed blocks were released */
FI_ALIGNED_WRITE, /* enable aligned write */
+ FI_COW_FILE, /* indicate COW file */
FI_MAX, /* max flag, never be used */
};
@@ -812,6 +815,8 @@ struct f2fs_inode_info {
unsigned char i_compress_level; /* compress level (lz4hc,zstd) */
unsigned short i_compress_flag; /* compress flag */
unsigned int i_cluster_size; /* cluster size */
+
+ unsigned int atomic_write_cnt;
};
static inline void get_extent_info(struct extent_info *ext,
@@ -1198,6 +1203,7 @@ struct f2fs_io_info {
bool retry; /* need to reallocate block address */
int compr_blocks; /* # of compressed block addresses */
bool encrypted; /* indicate file is encrypted */
+ bool post_read; /* require post read */
enum iostat_type io_type; /* io type */
struct writeback_control *io_wbc; /* writeback control */
struct bio **bio; /* bio for ipu */
@@ -1234,7 +1240,6 @@ struct f2fs_dev_info {
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int nr_blkz; /* Total number of zones */
unsigned long *blkz_seq; /* Bitmap indicating sequential zones */
- block_t *zone_capacity_blocks; /* Array of zone capacity in blks */
#endif
};
@@ -1360,6 +1365,13 @@ enum {
DISCARD_UNIT_SECTION, /* basic discard unit is section */
};
+enum {
+ MEMORY_MODE_NORMAL, /* memory mode for normal devices */
+ MEMORY_MODE_LOW, /* memory mode for low memry devices */
+};
+
+
+
static inline int f2fs_test_bit(unsigned int nr, char *addr);
static inline void f2fs_set_bit(unsigned int nr, char *addr);
static inline void f2fs_clear_bit(unsigned int nr, char *addr);
@@ -1580,6 +1592,7 @@ struct decompress_io_ctx {
void *private; /* payload buffer for specified decompression algorithm */
void *private2; /* extra payload buffer */
struct work_struct verity_work; /* work to verify the decompressed pages */
+ struct work_struct free_work; /* work for late free this structure itself */
};
#define NULL_CLUSTER ((unsigned int)(~0))
@@ -1664,6 +1677,7 @@ struct f2fs_sb_info {
unsigned int meta_ino_num; /* meta inode number*/
unsigned int log_blocks_per_seg; /* log2 blocks per segment */
unsigned int blocks_per_seg; /* blocks per segment */
+ unsigned int unusable_blocks_per_sec; /* unusable blocks per section */
unsigned int segs_per_sec; /* segments per section */
unsigned int secs_per_zone; /* sections per zone */
unsigned int total_sections; /* total section count */
@@ -1804,6 +1818,12 @@ struct f2fs_sb_info {
int max_fragment_chunk; /* max chunk size for block fragmentation mode */
int max_fragment_hole; /* max hole size for block fragmentation mode */
+ /* For atomic write statistics */
+ atomic64_t current_atomic_write;
+ s64 peak_atomic_write;
+ u64 committed_atomic_block;
+ u64 revoked_atomic_block;
+
#ifdef CONFIG_F2FS_FS_COMPRESSION
struct kmem_cache *page_array_slab; /* page array entry */
unsigned int page_array_slab_size; /* default page array slab size */
@@ -2418,6 +2438,28 @@ static inline void inode_dec_dirty_pages(struct inode *inode)
dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA);
}
+static inline void inc_atomic_write_cnt(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ u64 current_write;
+
+ fi->atomic_write_cnt++;
+ atomic64_inc(&sbi->current_atomic_write);
+ current_write = atomic64_read(&sbi->current_atomic_write);
+ if (current_write > sbi->peak_atomic_write)
+ sbi->peak_atomic_write = current_write;
+}
+
+static inline void release_atomic_write_cnt(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+
+ atomic64_sub(fi->atomic_write_cnt, &sbi->current_atomic_write);
+ fi->atomic_write_cnt = 0;
+}
+
static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type)
{
return atomic_read(&sbi->nr_pages[count_type]);
@@ -2696,16 +2738,6 @@ static inline struct page *f2fs_pagecache_get_page(
return pagecache_get_page(mapping, index, fgp_flags, gfp_mask);
}
-static inline void f2fs_copy_page(struct page *src, struct page *dst)
-{
- char *src_kaddr = kmap(src);
- char *dst_kaddr = kmap(dst);
-
- memcpy(dst_kaddr, src_kaddr, PAGE_SIZE);
- kunmap(dst);
- kunmap(src);
-}
-
static inline void f2fs_put_page(struct page *page, int unlock)
{
if (!page)
@@ -3208,6 +3240,11 @@ static inline bool f2fs_is_atomic_file(struct inode *inode)
return is_inode_flag_set(inode, FI_ATOMIC_FILE);
}
+static inline bool f2fs_is_cow_file(struct inode *inode)
+{
+ return is_inode_flag_set(inode, FI_COW_FILE);
+}
+
static inline bool f2fs_is_first_block_written(struct inode *inode)
{
return is_inode_flag_set(inode, FI_FIRST_BLOCK_WRITTEN);
@@ -4154,13 +4191,13 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page);
bool f2fs_is_compress_backend_ready(struct inode *inode);
int f2fs_init_compress_mempool(void);
void f2fs_destroy_compress_mempool(void);
-void f2fs_decompress_cluster(struct decompress_io_ctx *dic);
+void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task);
void f2fs_end_read_compressed_page(struct page *page, bool failed,
- block_t blkaddr);
+ block_t blkaddr, bool in_task);
bool f2fs_cluster_is_empty(struct compress_ctx *cc);
bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index);
-bool f2fs_all_cluster_page_loaded(struct compress_ctx *cc, struct pagevec *pvec,
- int index, int nr_pages);
+bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages,
+ int index, int nr_pages, bool uptodate);
bool f2fs_sanity_check_cluster(struct dnode_of_data *dn);
void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page);
int f2fs_write_multi_pages(struct compress_ctx *cc,
@@ -4175,8 +4212,9 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
unsigned nr_pages, sector_t *last_block_in_bio,
bool is_readahead, bool for_write);
struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc);
-void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed);
-void f2fs_put_page_dic(struct page *page);
+void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed,
+ bool in_task);
+void f2fs_put_page_dic(struct page *page, bool in_task);
unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn);
int f2fs_init_compress_ctx(struct compress_ctx *cc);
void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse);
@@ -4222,13 +4260,14 @@ static inline struct page *f2fs_compress_control_page(struct page *page)
}
static inline int f2fs_init_compress_mempool(void) { return 0; }
static inline void f2fs_destroy_compress_mempool(void) { }
-static inline void f2fs_decompress_cluster(struct decompress_io_ctx *dic) { }
+static inline void f2fs_decompress_cluster(struct decompress_io_ctx *dic,
+ bool in_task) { }
static inline void f2fs_end_read_compressed_page(struct page *page,
- bool failed, block_t blkaddr)
+ bool failed, block_t blkaddr, bool in_task)
{
WARN_ON_ONCE(1);
}
-static inline void f2fs_put_page_dic(struct page *page)
+static inline void f2fs_put_page_dic(struct page *page, bool in_task)
{
WARN_ON_ONCE(1);
}
@@ -4254,8 +4293,9 @@ static inline void f2fs_update_extent_tree_range_compressed(struct inode *inode,
unsigned int c_len) { }
#endif
-static inline void set_compress_context(struct inode *inode)
+static inline int set_compress_context(struct inode *inode)
{
+#ifdef CONFIG_F2FS_FS_COMPRESSION
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
F2FS_I(inode)->i_compress_algorithm =
@@ -4278,6 +4318,10 @@ static inline void set_compress_context(struct inode *inode)
stat_inc_compr_inode(inode);
inc_compr_inode_stat(inode);
f2fs_mark_inode_dirty_sync(inode, true);
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
}
static inline bool f2fs_disable_compressed_file(struct inode *inode)
@@ -4394,10 +4438,15 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi)
return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS;
}
+static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi)
+{
+ return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW;
+}
+
static inline bool f2fs_may_compress(struct inode *inode)
{
if (IS_SWAPFILE(inode) || f2fs_is_pinned_file(inode) ||
- f2fs_is_atomic_file(inode))
+ f2fs_is_atomic_file(inode) || f2fs_has_inline_data(inode))
return false;
return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode);
}
@@ -4459,12 +4508,7 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
/* disallow direct IO if any of devices has unaligned blksize */
if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize)
return true;
- /*
- * for blkzoned device, fallback direct IO to buffered IO, so
- * all IOs can be serialized by log-structured write.
- */
- if (f2fs_sb_has_blkzoned(sbi))
- return true;
+
if (f2fs_lfs_mode(sbi) && (rw == WRITE)) {
if (block_unaligned_IO(inode, iocb, iter))
return true;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index d66e37d80a2d..ce4905a073b3 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1272,7 +1272,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
f2fs_put_page(psrc, 1);
return PTR_ERR(pdst);
}
- f2fs_copy_page(psrc, pdst);
+ memcpy_page(pdst, 0, psrc, 0, PAGE_SIZE);
set_page_dirty(pdst);
f2fs_put_page(pdst, 1);
f2fs_put_page(psrc, 1);
@@ -1675,7 +1675,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
return 0;
if (f2fs_is_pinned_file(inode)) {
- block_t sec_blks = BLKS_PER_SEC(sbi);
+ block_t sec_blks = CAP_BLKS_PER_SEC(sbi);
block_t sec_len = roundup(map.m_len, sec_blks);
map.m_len = sec_blks;
@@ -1816,8 +1816,7 @@ static int f2fs_release_file(struct inode *inode, struct file *filp)
atomic_read(&inode->i_writecount) != 1)
return 0;
- if (f2fs_is_atomic_file(inode))
- f2fs_abort_atomic_write(inode, true);
+ f2fs_abort_atomic_write(inode, true);
return 0;
}
@@ -1831,8 +1830,7 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id)
* until all the writers close its file. Since this should be done
* before dropping file lock, it needs to do in ->flush.
*/
- if (f2fs_is_atomic_file(inode) &&
- F2FS_I(inode)->atomic_write_task == current)
+ if (F2FS_I(inode)->atomic_write_task == current)
f2fs_abort_atomic_write(inode, true);
return 0;
}
@@ -1867,22 +1865,15 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
if (masked_flags & F2FS_COMPR_FL) {
if (!f2fs_disable_compressed_file(inode))
return -EINVAL;
- }
- if (iflags & F2FS_NOCOMP_FL)
- return -EINVAL;
- if (iflags & F2FS_COMPR_FL) {
+ } else {
if (!f2fs_may_compress(inode))
return -EINVAL;
- if (S_ISREG(inode->i_mode) && inode->i_size)
+ if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))
return -EINVAL;
-
- set_compress_context(inode);
+ if (set_compress_context(inode))
+ return -EOPNOTSUPP;
}
}
- if ((iflags ^ masked_flags) & F2FS_NOCOMP_FL) {
- if (masked_flags & F2FS_COMPR_FL)
- return -EINVAL;
- }
fi->i_flags = iflags | (fi->i_flags & ~mask);
f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) &&
@@ -2062,13 +2053,14 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
set_inode_flag(inode, FI_ATOMIC_FILE);
- set_inode_flag(fi->cow_inode, FI_ATOMIC_FILE);
+ set_inode_flag(fi->cow_inode, FI_COW_FILE);
clear_inode_flag(fi->cow_inode, FI_INLINE_DATA);
f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
f2fs_update_time(sbi, REQ_TIME);
fi->atomic_write_task = current;
stat_update_max_atomic_write(inode);
+ fi->atomic_write_cnt = 0;
out:
inode_unlock(inode);
mnt_drop_write_file(filp);
@@ -2109,6 +2101,30 @@ unlock_out:
return ret;
}
+static int f2fs_ioc_abort_atomic_write(struct file *filp)
+{
+ struct inode *inode = file_inode(filp);
+ struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
+ int ret;
+
+ if (!inode_owner_or_capable(mnt_userns, inode))
+ return -EACCES;
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ inode_lock(inode);
+
+ f2fs_abort_atomic_write(inode, true);
+
+ inode_unlock(inode);
+
+ mnt_drop_write_file(filp);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+ return ret;
+}
+
static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -2426,7 +2442,7 @@ do_more:
ret = -EAGAIN;
goto out;
}
- range->start += BLKS_PER_SEC(sbi);
+ range->start += CAP_BLKS_PER_SEC(sbi);
if (range->start <= end)
goto do_more;
out:
@@ -2551,7 +2567,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
goto out;
}
- sec_num = DIV_ROUND_UP(total, BLKS_PER_SEC(sbi));
+ sec_num = DIV_ROUND_UP(total, CAP_BLKS_PER_SEC(sbi));
/*
* make sure there are enough free section for LFS allocation, this can
@@ -3897,10 +3913,10 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len)
for (i = 0; i < page_len; i++, redirty_idx++) {
page = find_lock_page(mapping, redirty_idx);
- if (!page) {
- ret = -ENOMEM;
- break;
- }
+
+ /* It will never fail, when page has pinned above */
+ f2fs_bug_on(F2FS_I_SB(inode), !page);
+
set_page_dirty(page);
f2fs_put_page(page, 1);
f2fs_put_page(page, 0);
@@ -3939,6 +3955,11 @@ static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg)
goto out;
}
+ if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
if (ret)
goto out;
@@ -4006,6 +4027,11 @@ static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg)
goto out;
}
+ if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
if (ret)
goto out;
@@ -4054,9 +4080,10 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_start_atomic_write(filp);
case F2FS_IOC_COMMIT_ATOMIC_WRITE:
return f2fs_ioc_commit_atomic_write(filp);
+ case F2FS_IOC_ABORT_ATOMIC_WRITE:
+ return f2fs_ioc_abort_atomic_write(filp);
case F2FS_IOC_START_VOLATILE_WRITE:
case F2FS_IOC_RELEASE_VOLATILE_WRITE:
- case F2FS_IOC_ABORT_VOLATILE_WRITE:
return -EOPNOTSUPP;
case F2FS_IOC_SHUTDOWN:
return f2fs_ioc_shutdown(filp, arg);
@@ -4725,7 +4752,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case F2FS_IOC_COMMIT_ATOMIC_WRITE:
case F2FS_IOC_START_VOLATILE_WRITE:
case F2FS_IOC_RELEASE_VOLATILE_WRITE:
- case F2FS_IOC_ABORT_VOLATILE_WRITE:
+ case F2FS_IOC_ABORT_ATOMIC_WRITE:
case F2FS_IOC_SHUTDOWN:
case FITRIM:
case FS_IOC_SET_ENCRYPTION_POLICY:
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index d5fb426e0747..6da21d405ce1 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -150,8 +150,11 @@ do_gc:
gc_control.nr_free_secs = foreground ? 1 : 0;
/* if return value is not zero, no victim was selected */
- if (f2fs_gc(sbi, &gc_control))
- wait_ms = gc_th->no_gc_sleep_time;
+ if (f2fs_gc(sbi, &gc_control)) {
+ /* don't bother wait_ms by foreground gc */
+ if (!foreground)
+ wait_ms = gc_th->no_gc_sleep_time;
+ }
if (foreground)
wake_up_all(&gc_th->fggc_wq);
@@ -487,7 +490,7 @@ static void atgc_lookup_victim(struct f2fs_sb_info *sbi,
unsigned long long age, u, accu;
unsigned long long max_mtime = sit_i->dirty_max_mtime;
unsigned long long min_mtime = sit_i->dirty_min_mtime;
- unsigned int sec_blocks = BLKS_PER_SEC(sbi);
+ unsigned int sec_blocks = CAP_BLKS_PER_SEC(sbi);
unsigned int vblocks;
unsigned int dirty_threshold = max(am->max_candidate_count,
am->candidate_ratio *
@@ -1487,7 +1490,7 @@ next_step:
*/
if ((gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) ||
(!force_migrate && get_valid_blocks(sbi, segno, true) ==
- BLKS_PER_SEC(sbi)))
+ CAP_BLKS_PER_SEC(sbi)))
return submitted;
if (check_valid_map(sbi, segno, off) == 0)
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 3fe145e8e594..19b956c2d697 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -120,15 +120,13 @@ static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)
return free_blks - ovp_blks;
}
-static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi)
+static inline block_t limit_invalid_user_blocks(block_t user_block_count)
{
- return (long)(sbi->user_block_count * LIMIT_INVALID_BLOCK) / 100;
+ return (long)(user_block_count * LIMIT_INVALID_BLOCK) / 100;
}
-static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi)
+static inline block_t limit_free_user_blocks(block_t reclaimable_user_blocks)
{
- block_t reclaimable_user_blocks = sbi->user_block_count -
- written_block_count(sbi);
return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100;
}
@@ -163,15 +161,16 @@ static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th,
static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
{
- block_t invalid_user_blocks = sbi->user_block_count -
- written_block_count(sbi);
+ block_t user_block_count = sbi->user_block_count;
+ block_t invalid_user_blocks = user_block_count -
+ written_block_count(sbi);
/*
* Background GC is triggered with the following conditions.
* 1. There are a number of invalid blocks.
* 2. There is not enough free space.
*/
- if (invalid_user_blocks > limit_invalid_user_blocks(sbi) &&
- free_user_blocks(sbi) < limit_free_user_blocks(sbi))
- return true;
- return false;
+ return (invalid_user_blocks >
+ limit_invalid_user_blocks(user_block_count) &&
+ free_user_blocks(sbi) <
+ limit_free_user_blocks(invalid_user_blocks));
}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index fc55f5bd1fcc..6d11c365d7b4 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -744,8 +744,7 @@ void f2fs_evict_inode(struct inode *inode)
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
int err = 0;
- if (f2fs_is_atomic_file(inode))
- f2fs_abort_atomic_write(inode, true);
+ f2fs_abort_atomic_write(inode, true);
trace_f2fs_evict_inode(inode);
truncate_inode_pages_final(&inode->i_data);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 740c72d088f3..e06a0c478b39 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1292,7 +1292,11 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs)
dec_valid_node_count(sbi, dn->inode, !ofs);
goto fail;
}
- f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
+ if (unlikely(new_ni.blk_addr != NULL_ADDR)) {
+ err = -EFSCORRUPTED;
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ goto fail;
+ }
#endif
new_ni.nid = dn->nid;
new_ni.ino = dn->inode->i_ino;
@@ -1945,7 +1949,6 @@ next_step:
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
bool submitted = false;
- bool may_dirty = true;
/* give a priority to WB_SYNC threads */
if (atomic_read(&sbi->wb_sync_req[NODE]) &&
@@ -1998,11 +2001,8 @@ continue_unlock:
}
/* flush dirty inode */
- if (IS_INODE(page) && may_dirty) {
- may_dirty = false;
- if (flush_dirty_inode(page))
- goto lock_node;
- }
+ if (IS_INODE(page) && flush_dirty_inode(page))
+ goto lock_node;
write_node:
f2fs_wait_on_page_writeback(page, NODE, true, true);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c7afc588cf26..0de21f82d7bc 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -190,18 +190,20 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
- if (f2fs_is_atomic_file(inode)) {
- if (clean)
- truncate_inode_pages_final(inode->i_mapping);
- clear_inode_flag(fi->cow_inode, FI_ATOMIC_FILE);
- iput(fi->cow_inode);
- fi->cow_inode = NULL;
- clear_inode_flag(inode, FI_ATOMIC_FILE);
+ if (!f2fs_is_atomic_file(inode))
+ return;
- spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
- sbi->atomic_files--;
- spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
- }
+ if (clean)
+ truncate_inode_pages_final(inode->i_mapping);
+ clear_inode_flag(fi->cow_inode, FI_COW_FILE);
+ iput(fi->cow_inode);
+ fi->cow_inode = NULL;
+ release_atomic_write_cnt(inode);
+ clear_inode_flag(inode, FI_ATOMIC_FILE);
+
+ spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+ sbi->atomic_files--;
+ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
}
static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
@@ -335,6 +337,11 @@ next:
}
out:
+ if (ret)
+ sbi->revoked_atomic_block += fi->atomic_write_cnt;
+ else
+ sbi->committed_atomic_block += fi->atomic_write_cnt;
+
__complete_revoke_list(inode, &revoke_list, ret ? true : false);
return ret;
@@ -728,7 +735,7 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
get_valid_blocks(sbi, segno, true);
f2fs_bug_on(sbi, unlikely(!valid_blocks ||
- valid_blocks == BLKS_PER_SEC(sbi)));
+ valid_blocks == CAP_BLKS_PER_SEC(sbi)));
if (!IS_CURSEC(sbi, secno))
set_bit(secno, dirty_i->dirty_secmap);
@@ -764,7 +771,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
if (!valid_blocks ||
- valid_blocks == BLKS_PER_SEC(sbi)) {
+ valid_blocks == CAP_BLKS_PER_SEC(sbi)) {
clear_bit(secno, dirty_i->dirty_secmap);
return;
}
@@ -3166,7 +3173,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
return CURSEG_COLD_DATA;
if (file_is_hot(inode) ||
is_inode_flag_set(inode, FI_HOT_DATA) ||
- f2fs_is_atomic_file(inode))
+ f2fs_is_cow_file(inode))
return CURSEG_HOT_DATA;
return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
} else {
@@ -3433,7 +3440,8 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
goto drop_bio;
}
- invalidate_mapping_pages(META_MAPPING(sbi),
+ if (fio->post_read)
+ invalidate_mapping_pages(META_MAPPING(sbi),
fio->new_blkaddr, fio->new_blkaddr);
stat_inc_inplace_blocks(fio->sbi);
@@ -3616,10 +3624,16 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
block_t len)
{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
block_t i;
+ if (!f2fs_post_read_required(inode))
+ return;
+
for (i = 0; i < len; i++)
f2fs_wait_on_block_writeback(inode, blkaddr + i);
+
+ invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1);
}
static int read_compacted_summaries(struct f2fs_sb_info *sbi)
@@ -4362,6 +4376,12 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
return err;
seg_info_from_raw_sit(se, &sit);
+ if (se->type >= NR_PERSISTENT_LOG) {
+ f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
+ se->type, start);
+ return -EFSCORRUPTED;
+ }
+
sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
if (f2fs_block_unit_discard(sbi)) {
@@ -4410,6 +4430,13 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
break;
seg_info_from_raw_sit(se, &sit);
+ if (se->type >= NR_PERSISTENT_LOG) {
+ f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
+ se->type, start);
+ err = -EFSCORRUPTED;
+ break;
+ }
+
sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
if (f2fs_block_unit_discard(sbi)) {
@@ -4483,7 +4510,6 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int segno = 0, offset = 0, secno;
block_t valid_blocks, usable_blks_in_seg;
- block_t blks_per_sec = BLKS_PER_SEC(sbi);
while (1) {
/* find dirty segment based on free segmap */
@@ -4512,7 +4538,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
valid_blocks = get_valid_blocks(sbi, segno, true);
secno = GET_SEC_FROM_SEG(sbi, segno);
- if (!valid_blocks || valid_blocks == blks_per_sec)
+ if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi))
continue;
if (IS_CURSEC(sbi, secno))
continue;
@@ -4895,7 +4921,7 @@ static unsigned int get_zone_idx(struct f2fs_sb_info *sbi, unsigned int secno,
static inline unsigned int f2fs_usable_zone_segs_in_sec(
struct f2fs_sb_info *sbi, unsigned int segno)
{
- unsigned int dev_idx, zone_idx, unusable_segs_in_sec;
+ unsigned int dev_idx, zone_idx;
dev_idx = f2fs_target_device_index(sbi, START_BLOCK(sbi, segno));
zone_idx = get_zone_idx(sbi, GET_SEC_FROM_SEG(sbi, segno), dev_idx);
@@ -4904,18 +4930,12 @@ static inline unsigned int f2fs_usable_zone_segs_in_sec(
if (is_conv_zone(sbi, zone_idx, dev_idx))
return sbi->segs_per_sec;
- /*
- * If the zone_capacity_blocks array is NULL, then zone capacity
- * is equal to the zone size for all zones
- */
- if (!FDEV(dev_idx).zone_capacity_blocks)
+ if (!sbi->unusable_blocks_per_sec)
return sbi->segs_per_sec;
/* Get the segment count beyond zone capacity block */
- unusable_segs_in_sec = (sbi->blocks_per_blkz -
- FDEV(dev_idx).zone_capacity_blocks[zone_idx]) >>
- sbi->log_blocks_per_seg;
- return sbi->segs_per_sec - unusable_segs_in_sec;
+ return sbi->segs_per_sec - (sbi->unusable_blocks_per_sec >>
+ sbi->log_blocks_per_seg);
}
/*
@@ -4944,12 +4964,11 @@ static inline unsigned int f2fs_usable_zone_blks_in_seg(
if (is_conv_zone(sbi, zone_idx, dev_idx))
return sbi->blocks_per_seg;
- if (!FDEV(dev_idx).zone_capacity_blocks)
+ if (!sbi->unusable_blocks_per_sec)
return sbi->blocks_per_seg;
sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
- sec_cap_blkaddr = sec_start_blkaddr +
- FDEV(dev_idx).zone_capacity_blocks[zone_idx];
+ sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi);
/*
* If segment starts before zone capacity and spans beyond
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 3f277dfcb131..d1d63766f2c7 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -101,6 +101,9 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
#define BLKS_PER_SEC(sbi) \
((sbi)->segs_per_sec * (sbi)->blocks_per_seg)
+#define CAP_BLKS_PER_SEC(sbi) \
+ ((sbi)->segs_per_sec * (sbi)->blocks_per_seg - \
+ (sbi)->unusable_blocks_per_sec)
#define GET_SEC_FROM_SEG(sbi, segno) \
(((segno) == -1) ? -1: (segno) / (sbi)->segs_per_sec)
#define GET_SEG_FROM_SEC(sbi, secno) \
@@ -609,10 +612,10 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
get_pages(sbi, F2FS_DIRTY_DENTS) +
get_pages(sbi, F2FS_DIRTY_IMETA);
unsigned int total_dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS);
- unsigned int node_secs = total_node_blocks / BLKS_PER_SEC(sbi);
- unsigned int dent_secs = total_dent_blocks / BLKS_PER_SEC(sbi);
- unsigned int node_blocks = total_node_blocks % BLKS_PER_SEC(sbi);
- unsigned int dent_blocks = total_dent_blocks % BLKS_PER_SEC(sbi);
+ unsigned int node_secs = total_node_blocks / CAP_BLKS_PER_SEC(sbi);
+ unsigned int dent_secs = total_dent_blocks / CAP_BLKS_PER_SEC(sbi);
+ unsigned int node_blocks = total_node_blocks % CAP_BLKS_PER_SEC(sbi);
+ unsigned int dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi);
unsigned int free, need_lower, need_upper;
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index bce02306f7a0..2451623c05a7 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
+#include <linux/fs_context.h>
#include <linux/sched/mm.h>
#include <linux/statfs.h>
#include <linux/buffer_head.h>
@@ -159,6 +160,7 @@ enum {
Opt_gc_merge,
Opt_nogc_merge,
Opt_discard_unit,
+ Opt_memory_mode,
Opt_err,
};
@@ -235,6 +237,7 @@ static match_table_t f2fs_tokens = {
{Opt_gc_merge, "gc_merge"},
{Opt_nogc_merge, "nogc_merge"},
{Opt_discard_unit, "discard_unit=%s"},
+ {Opt_memory_mode, "memory=%s"},
{Opt_err, NULL},
};
@@ -492,9 +495,19 @@ static int f2fs_set_test_dummy_encryption(struct super_block *sb,
bool is_remount)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
-#ifdef CONFIG_FS_ENCRYPTION
+ struct fs_parameter param = {
+ .type = fs_value_is_string,
+ .string = arg->from ? arg->from : "",
+ };
+ struct fscrypt_dummy_policy *policy =
+ &F2FS_OPTION(sbi).dummy_enc_policy;
int err;
+ if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) {
+ f2fs_warn(sbi, "test_dummy_encryption option not supported");
+ return -EINVAL;
+ }
+
if (!f2fs_sb_has_encrypt(sbi)) {
f2fs_err(sbi, "Encrypt feature is off");
return -EINVAL;
@@ -506,12 +519,12 @@ static int f2fs_set_test_dummy_encryption(struct super_block *sb,
* needed to allow it to be set or changed during remount. We do allow
* it to be specified during remount, but only if there is no change.
*/
- if (is_remount && !F2FS_OPTION(sbi).dummy_enc_policy.policy) {
+ if (is_remount && !fscrypt_is_dummy_policy_set(policy)) {
f2fs_warn(sbi, "Can't set test_dummy_encryption on remount");
return -EINVAL;
}
- err = fscrypt_set_test_dummy_encryption(
- sb, arg->from, &F2FS_OPTION(sbi).dummy_enc_policy);
+
+ err = fscrypt_parse_test_dummy_encryption(&param, policy);
if (err) {
if (err == -EEXIST)
f2fs_warn(sbi,
@@ -524,12 +537,14 @@ static int f2fs_set_test_dummy_encryption(struct super_block *sb,
opt, err);
return -EINVAL;
}
+ err = fscrypt_add_test_dummy_key(sb, policy);
+ if (err) {
+ f2fs_warn(sbi, "Error adding test dummy encryption key [%d]",
+ err);
+ return err;
+ }
f2fs_warn(sbi, "Test dummy encryption mode enabled");
return 0;
-#else
- f2fs_warn(sbi, "test_dummy_encryption option not supported");
- return -EINVAL;
-#endif
}
#ifdef CONFIG_F2FS_FS_COMPRESSION
@@ -1222,6 +1237,22 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
}
kfree(name);
break;
+ case Opt_memory_mode:
+ name = match_strdup(&args[0]);
+ if (!name)
+ return -ENOMEM;
+ if (!strcmp(name, "normal")) {
+ F2FS_OPTION(sbi).memory_mode =
+ MEMORY_MODE_NORMAL;
+ } else if (!strcmp(name, "low")) {
+ F2FS_OPTION(sbi).memory_mode =
+ MEMORY_MODE_LOW;
+ } else {
+ kfree(name);
+ return -EINVAL;
+ }
+ kfree(name);
+ break;
default:
f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
p);
@@ -1380,8 +1411,7 @@ static int f2fs_drop_inode(struct inode *inode)
atomic_inc(&inode->i_count);
spin_unlock(&inode->i_lock);
- if (f2fs_is_atomic_file(inode))
- f2fs_abort_atomic_write(inode, true);
+ f2fs_abort_atomic_write(inode, true);
/* should remain fi->extent_tree for writepage */
f2fs_destroy_extent_node(inode);
@@ -1491,7 +1521,6 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
blkdev_put(FDEV(i).bdev, FMODE_EXCL);
#ifdef CONFIG_BLK_DEV_ZONED
kvfree(FDEV(i).blkz_seq);
- kfree(FDEV(i).zone_capacity_blocks);
#endif
}
kvfree(sbi->devs);
@@ -1993,6 +2022,11 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
seq_printf(seq, ",discard_unit=%s", "section");
+ if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_NORMAL)
+ seq_printf(seq, ",memory=%s", "normal");
+ else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW)
+ seq_printf(seq, ",memory=%s", "low");
+
return 0;
}
@@ -2014,6 +2048,7 @@ static void default_options(struct f2fs_sb_info *sbi)
F2FS_OPTION(sbi).compress_ext_cnt = 0;
F2FS_OPTION(sbi).compress_mode = COMPR_MODE_FS;
F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
+ F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
sbi->sb->s_flags &= ~SB_INLINECRYPT;
@@ -3579,6 +3614,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE;
sbi->max_fragment_hole = DEF_FRAGMENT_SIZE;
spin_lock_init(&sbi->gc_urgent_high_lock);
+ atomic64_set(&sbi->current_atomic_write, 0);
sbi->dir_level = DEF_DIR_LEVEL;
sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
@@ -3636,24 +3672,29 @@ err_valid_block:
#ifdef CONFIG_BLK_DEV_ZONED
struct f2fs_report_zones_args {
+ struct f2fs_sb_info *sbi;
struct f2fs_dev_info *dev;
- bool zone_cap_mismatch;
};
static int f2fs_report_zone_cb(struct blk_zone *zone, unsigned int idx,
void *data)
{
struct f2fs_report_zones_args *rz_args = data;
+ block_t unusable_blocks = (zone->len - zone->capacity) >>
+ F2FS_LOG_SECTORS_PER_BLOCK;
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return 0;
set_bit(idx, rz_args->dev->blkz_seq);
- rz_args->dev->zone_capacity_blocks[idx] = zone->capacity >>
- F2FS_LOG_SECTORS_PER_BLOCK;
- if (zone->len != zone->capacity && !rz_args->zone_cap_mismatch)
- rz_args->zone_cap_mismatch = true;
-
+ if (!rz_args->sbi->unusable_blocks_per_sec) {
+ rz_args->sbi->unusable_blocks_per_sec = unusable_blocks;
+ return 0;
+ }
+ if (rz_args->sbi->unusable_blocks_per_sec != unusable_blocks) {
+ f2fs_err(rz_args->sbi, "F2FS supports single zone capacity\n");
+ return -EINVAL;
+ }
return 0;
}
@@ -3694,26 +3735,13 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
if (!FDEV(devi).blkz_seq)
return -ENOMEM;
- /* Get block zones type and zone-capacity */
- FDEV(devi).zone_capacity_blocks = f2fs_kzalloc(sbi,
- FDEV(devi).nr_blkz * sizeof(block_t),
- GFP_KERNEL);
- if (!FDEV(devi).zone_capacity_blocks)
- return -ENOMEM;
-
+ rep_zone_arg.sbi = sbi;
rep_zone_arg.dev = &FDEV(devi);
- rep_zone_arg.zone_cap_mismatch = false;
ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, f2fs_report_zone_cb,
&rep_zone_arg);
if (ret < 0)
return ret;
-
- if (!rep_zone_arg.zone_cap_mismatch) {
- kfree(FDEV(devi).zone_capacity_blocks);
- FDEV(devi).zone_capacity_blocks = NULL;
- }
-
return 0;
}
#endif
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 4c50aedd5144..eba5fb1629d7 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -339,6 +339,21 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
sbi->gc_reclaimed_segs[sbi->gc_segment_mode]);
}
+ if (!strcmp(a->attr.name, "current_atomic_write")) {
+ s64 current_write = atomic64_read(&sbi->current_atomic_write);
+
+ return sysfs_emit(buf, "%lld\n", current_write);
+ }
+
+ if (!strcmp(a->attr.name, "peak_atomic_write"))
+ return sysfs_emit(buf, "%lld\n", sbi->peak_atomic_write);
+
+ if (!strcmp(a->attr.name, "committed_atomic_block"))
+ return sysfs_emit(buf, "%llu\n", sbi->committed_atomic_block);
+
+ if (!strcmp(a->attr.name, "revoked_atomic_block"))
+ return sysfs_emit(buf, "%llu\n", sbi->revoked_atomic_block);
+
ui = (unsigned int *)(ptr + a->offset);
return sprintf(buf, "%u\n", *ui);
@@ -608,6 +623,27 @@ out:
return count;
}
+ if (!strcmp(a->attr.name, "peak_atomic_write")) {
+ if (t != 0)
+ return -EINVAL;
+ sbi->peak_atomic_write = 0;
+ return count;
+ }
+
+ if (!strcmp(a->attr.name, "committed_atomic_block")) {
+ if (t != 0)
+ return -EINVAL;
+ sbi->committed_atomic_block = 0;
+ return count;
+ }
+
+ if (!strcmp(a->attr.name, "revoked_atomic_block")) {
+ if (t != 0)
+ return -EINVAL;
+ sbi->revoked_atomic_block = 0;
+ return count;
+ }
+
*ui = (unsigned int)t;
return count;
@@ -713,6 +749,11 @@ static struct f2fs_attr f2fs_attr_##_name = { \
.offset = _offset \
}
+#define F2FS_RO_ATTR(struct_type, struct_name, name, elname) \
+ F2FS_ATTR_OFFSET(struct_type, name, 0444, \
+ f2fs_sbi_show, NULL, \
+ offsetof(struct struct_name, elname))
+
#define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \
F2FS_ATTR_OFFSET(struct_type, name, 0644, \
f2fs_sbi_show, f2fs_sbi_store, \
@@ -811,6 +852,8 @@ F2FS_FEATURE_RO_ATTR(encrypted_casefold);
#endif /* CONFIG_FS_ENCRYPTION */
#ifdef CONFIG_BLK_DEV_ZONED
F2FS_FEATURE_RO_ATTR(block_zoned);
+F2FS_RO_ATTR(F2FS_SBI, f2fs_sb_info, unusable_blocks_per_sec,
+ unusable_blocks_per_sec);
#endif
F2FS_FEATURE_RO_ATTR(atomic_write);
F2FS_FEATURE_RO_ATTR(extra_attr);
@@ -848,6 +891,12 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_reclaimed_segments, gc_reclaimed_segs);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_chunk, max_fragment_chunk);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_hole, max_fragment_hole);
+/* For atomic write */
+F2FS_RO_ATTR(F2FS_SBI, f2fs_sb_info, current_atomic_write, current_atomic_write);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, peak_atomic_write, peak_atomic_write);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, committed_atomic_block, committed_atomic_block);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, revoked_atomic_block, revoked_atomic_block);
+
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
ATTR_LIST(gc_urgent_sleep_time),
@@ -919,6 +968,9 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(moved_blocks_background),
ATTR_LIST(avg_vblocks),
#endif
+#ifdef CONFIG_BLK_DEV_ZONED
+ ATTR_LIST(unusable_blocks_per_sec),
+#endif
#ifdef CONFIG_F2FS_FS_COMPRESSION
ATTR_LIST(compr_written_block),
ATTR_LIST(compr_saved_block),
@@ -934,6 +986,10 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(gc_reclaimed_segments),
ATTR_LIST(max_fragment_chunk),
ATTR_LIST(max_fragment_hole),
+ ATTR_LIST(current_atomic_write),
+ ATTR_LIST(peak_atomic_write),
+ ATTR_LIST(committed_atomic_block),
+ ATTR_LIST(revoked_atomic_block),
NULL,
};
ATTRIBUTE_GROUPS(f2fs);
diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
index 352a822d4370..3121d127d5aa 100644
--- a/include/uapi/linux/f2fs.h
+++ b/include/uapi/linux/f2fs.h
@@ -13,7 +13,7 @@
#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2)
#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3)
#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4)
-#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
+#define F2FS_IOC_ABORT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
#define F2FS_IOC_GARBAGE_COLLECT _IOW(F2FS_IOCTL_MAGIC, 6, __u32)
#define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7)
#define F2FS_IOC_DEFRAGMENT _IOWR(F2FS_IOCTL_MAGIC, 8, \