From 2bc4bea33848e4c5f441fcec4a94497082c1cc9f Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 30 Mar 2020 03:30:59 +0000 Subject: f2fs: add tracepoint for f2fs iostat Added a tracepoint to see iostat of f2fs. Default period of that is 3 second. This tracepoint can be used to be monitoring I/O statistics periodically. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/trace/events/f2fs.h | 52 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index d97adfc327f0..e78c8696e2ad 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1812,6 +1812,58 @@ DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_pages_end, TP_ARGS(inode, cluster_idx, compressed_size, ret) ); +TRACE_EVENT(f2fs_iostat, + + TP_PROTO(struct f2fs_sb_info *sbi, unsigned long long *iostat), + + TP_ARGS(sbi, iostat), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned long long, app_dio) + __field(unsigned long long, app_bio) + __field(unsigned long long, app_wio) + __field(unsigned long long, app_mio) + __field(unsigned long long, fs_dio) + __field(unsigned long long, fs_nio) + __field(unsigned long long, fs_mio) + __field(unsigned long long, fs_gc_dio) + __field(unsigned long long, fs_gc_nio) + __field(unsigned long long, fs_cp_dio) + __field(unsigned long long, fs_cp_nio) + __field(unsigned long long, fs_cp_mio) + __field(unsigned long long, fs_discard) + ), + + TP_fast_assign( + __entry->dev = sbi->sb->s_dev; + __entry->app_dio = iostat[APP_DIRECT_IO]; + __entry->app_bio = iostat[APP_BUFFERED_IO]; + __entry->app_wio = iostat[APP_WRITE_IO]; + __entry->app_mio = iostat[APP_MAPPED_IO]; + __entry->fs_dio = iostat[FS_DATA_IO]; + __entry->fs_nio = iostat[FS_NODE_IO]; + __entry->fs_mio = iostat[FS_META_IO]; + __entry->fs_gc_dio = iostat[FS_GC_DATA_IO]; + __entry->fs_gc_nio = iostat[FS_GC_NODE_IO]; + __entry->fs_cp_dio = iostat[FS_CP_DATA_IO]; + __entry->fs_cp_nio = iostat[FS_CP_NODE_IO]; + __entry->fs_cp_mio = iostat[FS_CP_META_IO]; + __entry->fs_discard = iostat[FS_DISCARD]; + ), + + TP_printk("dev = (%d,%d), " + "app [write=%llu (direct=%llu, buffered=%llu), mapped=%llu], " + "fs [data=%llu, node=%llu, meta=%llu, discard=%llu], " + "gc [data=%llu, node=%llu], " + "cp [data=%llu, node=%llu, meta=%llu]", + show_dev(__entry->dev), __entry->app_wio, __entry->app_dio, + __entry->app_bio, __entry->app_mio, __entry->fs_dio, + __entry->fs_nio, __entry->fs_mio, __entry->fs_discard, + __entry->fs_gc_dio, __entry->fs_gc_nio, __entry->fs_cp_dio, + __entry->fs_cp_nio, __entry->fs_cp_mio) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ -- cgit v1.2.3-59-g8ed1b From 8b83ac81f4283ae3bd05c9a7e15dca721014dd03 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 16 Apr 2020 18:16:56 +0800 Subject: f2fs: support read iostat Adds to support accounting read IOs from userspace/kernel. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 5 +++++ fs/f2fs/data.c | 6 ++++++ fs/f2fs/f2fs.h | 37 +++++++++++++++++++++++--------- fs/f2fs/file.c | 12 ++++++++++- fs/f2fs/gc.c | 6 ++++++ fs/f2fs/node.c | 8 ++++++- fs/f2fs/sysfs.c | 52 ++++++++++++++++++++++++++++++--------------- include/trace/events/f2fs.h | 23 ++++++++++++++++++-- 8 files changed, 118 insertions(+), 31 deletions(-) (limited to 'include/trace') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6be357c8e002..5ba649e17c72 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -86,6 +86,8 @@ repeat: return ERR_PTR(err); } + f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE); + lock_page(page); if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); @@ -266,6 +268,9 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, fio.page = page; err = f2fs_submit_page_bio(&fio); f2fs_put_page(page, err ? 1 : 0); + + if (!err) + f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE); } out: blk_finish_plug(&plug); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index accd28728642..199877cb57fe 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1033,6 +1033,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, } ClearPageError(page); inc_page_count(sbi, F2FS_RD_DATA); + f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); __submit_bio(sbi, bio, DATA); return 0; } @@ -2038,6 +2039,7 @@ submit_and_realloc: goto submit_and_realloc; inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); + f2fs_update_iostat(F2FS_I_SB(inode), FS_DATA_READ_IO, F2FS_BLKSIZE); ClearPageError(page); *last_block_in_bio = block_nr; goto out; @@ -2173,6 +2175,7 @@ submit_and_realloc: goto submit_and_realloc; inc_page_count(sbi, F2FS_RD_DATA); + f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); ClearPageError(page); *last_block_in_bio = blkaddr; } @@ -3526,6 +3529,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } else if (err < 0) { f2fs_write_failed(mapping, offset + count); } + } else { + if (err > 0) + f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err); } out: diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6cedbfb2067c..3b9603266a2a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1088,8 +1088,9 @@ enum cp_reason_type { }; enum iostat_type { - APP_DIRECT_IO, /* app direct IOs */ - APP_BUFFERED_IO, /* app buffered IOs */ + /* WRITE IO */ + APP_DIRECT_IO, /* app direct write IOs */ + APP_BUFFERED_IO, /* app buffered write IOs */ APP_WRITE_IO, /* app write IOs */ APP_MAPPED_IO, /* app mapped IOs */ FS_DATA_IO, /* data IOs from kworker/fsync/reclaimer */ @@ -1100,6 +1101,17 @@ enum iostat_type { FS_CP_DATA_IO, /* data IOs from checkpoint */ FS_CP_NODE_IO, /* node IOs from checkpoint */ FS_CP_META_IO, /* meta IOs from checkpoint */ + + /* READ IO */ + APP_DIRECT_READ_IO, /* app direct read IOs */ + APP_BUFFERED_READ_IO, /* app buffered read IOs */ + APP_READ_IO, /* app read IOs */ + APP_MAPPED_READ_IO, /* app mapped read IOs */ + FS_DATA_READ_IO, /* data read IOs */ + FS_NODE_READ_IO, /* node read IOs */ + FS_META_READ_IO, /* meta read IOs */ + + /* other */ FS_DISCARD, /* discard */ NR_IO_TYPE, }; @@ -1504,8 +1516,8 @@ struct f2fs_sb_info { /* For app/fs IO statistics */ spinlock_t iostat_lock; - unsigned long long write_iostat[NR_IO_TYPE]; - unsigned long long prev_write_iostat[NR_IO_TYPE]; + unsigned long long rw_iostat[NR_IO_TYPE]; + unsigned long long prev_rw_iostat[NR_IO_TYPE]; bool iostat_enable; unsigned long iostat_next_period; unsigned int iostat_period_ms; @@ -3013,8 +3025,8 @@ static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi) spin_lock(&sbi->iostat_lock); for (i = 0; i < NR_IO_TYPE; i++) { - sbi->write_iostat[i] = 0; - sbi->prev_write_iostat[i] = 0; + sbi->rw_iostat[i] = 0; + sbi->prev_rw_iostat[i] = 0; } spin_unlock(&sbi->iostat_lock); } @@ -3027,12 +3039,17 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, if (!sbi->iostat_enable) return; spin_lock(&sbi->iostat_lock); - sbi->write_iostat[type] += io_bytes; + sbi->rw_iostat[type] += io_bytes; if (type == APP_WRITE_IO || type == APP_DIRECT_IO) - sbi->write_iostat[APP_BUFFERED_IO] = - sbi->write_iostat[APP_WRITE_IO] - - sbi->write_iostat[APP_DIRECT_IO]; + sbi->rw_iostat[APP_BUFFERED_IO] = + sbi->rw_iostat[APP_WRITE_IO] - + sbi->rw_iostat[APP_DIRECT_IO]; + + if (type == APP_READ_IO || type == APP_DIRECT_READ_IO) + sbi->rw_iostat[APP_BUFFERED_READ_IO] = + sbi->rw_iostat[APP_READ_IO] - + sbi->rw_iostat[APP_DIRECT_READ_IO]; spin_unlock(&sbi->iostat_lock); f2fs_record_iostat(sbi); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6ab8f621a3c5..a0a4413d6083 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -40,6 +40,10 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) ret = filemap_fault(vmf); up_read(&F2FS_I(inode)->i_mmap_sem); + if (!ret) + f2fs_update_iostat(F2FS_I_SB(inode), APP_MAPPED_READ_IO, + F2FS_BLKSIZE); + trace_f2fs_filemap_fault(inode, vmf->pgoff, (unsigned long)ret); return ret; @@ -3510,11 +3514,17 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); + int ret; if (!f2fs_is_compress_backend_ready(inode)) return -EOPNOTSUPP; - return generic_file_read_iter(iocb, iter); + ret = generic_file_read_iter(iocb, iter); + + if (ret > 0) + f2fs_update_iostat(F2FS_I_SB(inode), APP_READ_IO, ret); + + return ret; } static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 26248c8936db..28a8c79c8bdc 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -737,6 +737,9 @@ got_it: goto put_encrypted_page; f2fs_put_page(fio.encrypted_page, 0); f2fs_put_page(page, 1); + + f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); + return 0; put_encrypted_page: f2fs_put_page(fio.encrypted_page, 1); @@ -840,6 +843,9 @@ static int move_data_block(struct inode *inode, block_t bidx, f2fs_put_page(mpage, 1); goto up_out; } + + f2fs_update_iostat(fio.sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); + lock_page(mpage); if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) || !PageUptodate(mpage))) { diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ecbd6bd14a49..4da0d8713df5 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1300,7 +1300,13 @@ static int read_node_page(struct page *page, int op_flags) } fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr; - return f2fs_submit_page_bio(&fio); + + err = f2fs_submit_page_bio(&fio); + + if (!err) + f2fs_update_iostat(sbi, FS_NODE_READ_IO, F2FS_BLKSIZE); + + return err; } /* diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index d05cb68c2637..eaf8088548f0 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -781,9 +781,9 @@ void f2fs_record_iostat(struct f2fs_sb_info *sbi) msecs_to_jiffies(sbi->iostat_period_ms); for (i = 0; i < NR_IO_TYPE; i++) { - iostat_diff[i] = sbi->write_iostat[i] - - sbi->prev_write_iostat[i]; - sbi->prev_write_iostat[i] = sbi->write_iostat[i]; + iostat_diff[i] = sbi->rw_iostat[i] - + sbi->prev_rw_iostat[i]; + sbi->prev_rw_iostat[i] = sbi->rw_iostat[i]; } spin_unlock(&sbi->iostat_lock); @@ -802,33 +802,51 @@ static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, seq_printf(seq, "time: %-16llu\n", now); - /* print app IOs */ + /* print app write IOs */ seq_printf(seq, "app buffered: %-16llu\n", - sbi->write_iostat[APP_BUFFERED_IO]); + sbi->rw_iostat[APP_BUFFERED_IO]); seq_printf(seq, "app direct: %-16llu\n", - sbi->write_iostat[APP_DIRECT_IO]); + sbi->rw_iostat[APP_DIRECT_IO]); seq_printf(seq, "app mapped: %-16llu\n", - sbi->write_iostat[APP_MAPPED_IO]); + sbi->rw_iostat[APP_MAPPED_IO]); - /* print fs IOs */ + /* print fs write IOs */ seq_printf(seq, "fs data: %-16llu\n", - sbi->write_iostat[FS_DATA_IO]); + sbi->rw_iostat[FS_DATA_IO]); seq_printf(seq, "fs node: %-16llu\n", - sbi->write_iostat[FS_NODE_IO]); + sbi->rw_iostat[FS_NODE_IO]); seq_printf(seq, "fs meta: %-16llu\n", - sbi->write_iostat[FS_META_IO]); + sbi->rw_iostat[FS_META_IO]); seq_printf(seq, "fs gc data: %-16llu\n", - sbi->write_iostat[FS_GC_DATA_IO]); + sbi->rw_iostat[FS_GC_DATA_IO]); seq_printf(seq, "fs gc node: %-16llu\n", - sbi->write_iostat[FS_GC_NODE_IO]); + sbi->rw_iostat[FS_GC_NODE_IO]); seq_printf(seq, "fs cp data: %-16llu\n", - sbi->write_iostat[FS_CP_DATA_IO]); + sbi->rw_iostat[FS_CP_DATA_IO]); seq_printf(seq, "fs cp node: %-16llu\n", - sbi->write_iostat[FS_CP_NODE_IO]); + sbi->rw_iostat[FS_CP_NODE_IO]); seq_printf(seq, "fs cp meta: %-16llu\n", - sbi->write_iostat[FS_CP_META_IO]); + sbi->rw_iostat[FS_CP_META_IO]); + + /* print app read IOs */ + seq_printf(seq, "app buffered: %-16llu\n", + sbi->rw_iostat[APP_BUFFERED_READ_IO]); + seq_printf(seq, "app direct: %-16llu\n", + sbi->rw_iostat[APP_DIRECT_READ_IO]); + seq_printf(seq, "app mapped: %-16llu\n", + sbi->rw_iostat[APP_MAPPED_READ_IO]); + + /* print fs read IOs */ + seq_printf(seq, "fs data: %-16llu\n", + sbi->rw_iostat[FS_DATA_READ_IO]); + seq_printf(seq, "fs node: %-16llu\n", + sbi->rw_iostat[FS_NODE_READ_IO]); + seq_printf(seq, "fs meta: %-16llu\n", + sbi->rw_iostat[FS_META_READ_IO]); + + /* print other IOs */ seq_printf(seq, "fs discard: %-16llu\n", - sbi->write_iostat[FS_DISCARD]); + sbi->rw_iostat[FS_DISCARD]); return 0; } diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index e78c8696e2ad..417a486f5c8a 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1832,6 +1832,13 @@ TRACE_EVENT(f2fs_iostat, __field(unsigned long long, fs_cp_dio) __field(unsigned long long, fs_cp_nio) __field(unsigned long long, fs_cp_mio) + __field(unsigned long long, app_drio) + __field(unsigned long long, app_brio) + __field(unsigned long long, app_rio) + __field(unsigned long long, app_mrio) + __field(unsigned long long, fs_drio) + __field(unsigned long long, fs_nrio) + __field(unsigned long long, fs_mrio) __field(unsigned long long, fs_discard) ), @@ -1849,6 +1856,13 @@ TRACE_EVENT(f2fs_iostat, __entry->fs_cp_dio = iostat[FS_CP_DATA_IO]; __entry->fs_cp_nio = iostat[FS_CP_NODE_IO]; __entry->fs_cp_mio = iostat[FS_CP_META_IO]; + __entry->app_drio = iostat[APP_DIRECT_READ_IO]; + __entry->app_brio = iostat[APP_BUFFERED_READ_IO]; + __entry->app_rio = iostat[APP_READ_IO]; + __entry->app_mrio = iostat[APP_MAPPED_READ_IO]; + __entry->fs_drio = iostat[FS_DATA_READ_IO]; + __entry->fs_nrio = iostat[FS_NODE_READ_IO]; + __entry->fs_mrio = iostat[FS_META_READ_IO]; __entry->fs_discard = iostat[FS_DISCARD]; ), @@ -1856,12 +1870,17 @@ TRACE_EVENT(f2fs_iostat, "app [write=%llu (direct=%llu, buffered=%llu), mapped=%llu], " "fs [data=%llu, node=%llu, meta=%llu, discard=%llu], " "gc [data=%llu, node=%llu], " - "cp [data=%llu, node=%llu, meta=%llu]", + "cp [data=%llu, node=%llu, meta=%llu], " + "app [read=%llu (direct=%llu, buffered=%llu), mapped=%llu], " + "fs [data=%llu, node=%llu, meta=%llu]", show_dev(__entry->dev), __entry->app_wio, __entry->app_dio, __entry->app_bio, __entry->app_mio, __entry->fs_dio, __entry->fs_nio, __entry->fs_mio, __entry->fs_discard, __entry->fs_gc_dio, __entry->fs_gc_nio, __entry->fs_cp_dio, - __entry->fs_cp_nio, __entry->fs_cp_mio) + __entry->fs_cp_nio, __entry->fs_cp_mio, + __entry->app_rio, __entry->app_drio, __entry->app_brio, + __entry->app_mrio, __entry->fs_drio, __entry->fs_nrio, + __entry->fs_mrio) ); #endif /* _TRACE_F2FS_H */ -- cgit v1.2.3-59-g8ed1b From 6d92b201035dfe77426f8814fd5259db385a18b3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 8 Apr 2020 19:56:32 +0800 Subject: f2fs: compress: support lzo-rle compress algorithm LZO-RLE extension (run length encoding) was introduced to improve performance of LZO algorithm in scenario of data contains many zeros, zram has changed to use this extended algorithm by default, this patch adds to support this algorithm extension, to enable this extension, it needs to enable F2FS_FS_LZO and F2FS_FS_LZORLE config, and specifies "compress_algorithm=lzo-rle" mountoption. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 2 +- fs/f2fs/Kconfig | 10 ++++++++++ fs/f2fs/compress.c | 30 ++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 6 ++++++ include/trace/events/f2fs.h | 3 ++- 6 files changed, 50 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 87d794bc75a4..96e3f89d7aba 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -244,7 +244,7 @@ checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "enabl would be unusable can be viewed at /sys/fs/f2fs//unusable This space is reclaimed once checkpoint=enable. compress_algorithm=%s Control compress algorithm, currently f2fs supports "lzo", - "lz4" and "zstd" algorithm. + "lz4", "zstd" and "lzo-rle" algorithm. compress_log_size=%u Support configuring compress cluster size, the size will be 4KB * (1 << %u), 16KB is minimum size, also it's default size. diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index bb68d21e1f8c..d13c5c6a9787 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -127,3 +127,13 @@ config F2FS_FS_ZSTD default y help Support ZSTD compress algorithm, if unsure, say Y. + +config F2FS_FS_LZORLE + bool "LZO-RLE compression support" + depends on F2FS_FS_COMPRESSION + depends on F2FS_FS_LZO + select LZO_COMPRESS + select LZO_DECOMPRESS + default y + help + Support LZO-RLE compress algorithm, if unsure, say Y. diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 230ea7cd1510..c7c5a8f8a48c 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -442,6 +442,31 @@ static const struct f2fs_compress_ops f2fs_zstd_ops = { }; #endif +#ifdef CONFIG_F2FS_FS_LZO +#ifdef CONFIG_F2FS_FS_LZORLE +static int lzorle_compress_pages(struct compress_ctx *cc) +{ + int ret; + + ret = lzorle1x_1_compress(cc->rbuf, cc->rlen, cc->cbuf->cdata, + &cc->clen, cc->private); + if (ret != LZO_E_OK) { + printk_ratelimited("%sF2FS-fs (%s): lzo-rle compress failed, ret:%d\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, ret); + return -EIO; + } + return 0; +} + +static const struct f2fs_compress_ops f2fs_lzorle_ops = { + .init_compress_ctx = lzo_init_compress_ctx, + .destroy_compress_ctx = lzo_destroy_compress_ctx, + .compress_pages = lzorle_compress_pages, + .decompress_pages = lzo_decompress_pages, +}; +#endif +#endif + static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = { #ifdef CONFIG_F2FS_FS_LZO &f2fs_lzo_ops, @@ -458,6 +483,11 @@ static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = { #else NULL, #endif +#if defined(CONFIG_F2FS_FS_LZO) && defined(CONFIG_F2FS_FS_LZORLE) + &f2fs_lzorle_ops, +#else + NULL, +#endif }; bool f2fs_is_compress_backend_ready(struct inode *inode) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d098b94ca22d..0dab21e764d9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1281,6 +1281,7 @@ enum compress_algorithm_type { COMPRESS_LZO, COMPRESS_LZ4, COMPRESS_ZSTD, + COMPRESS_LZORLE, COMPRESS_MAX, }; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1b170fc48941..582bbf40c559 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -822,6 +822,9 @@ static int parse_options(struct super_block *sb, char *options) } else if (!strcmp(name, "zstd")) { F2FS_OPTION(sbi).compress_algorithm = COMPRESS_ZSTD; + } else if (!strcmp(name, "lzo-rle")) { + F2FS_OPTION(sbi).compress_algorithm = + COMPRESS_LZORLE; } else { kfree(name); return -EINVAL; @@ -1415,6 +1418,9 @@ static inline void f2fs_show_compress_options(struct seq_file *seq, case COMPRESS_ZSTD: algtype = "zstd"; break; + case COMPRESS_LZORLE: + algtype = "lzo-rle"; + break; } seq_printf(seq, ",compress_algorithm=%s", algtype); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 417a486f5c8a..757d3d6031e6 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -154,7 +154,8 @@ TRACE_DEFINE_ENUM(CP_PAUSE); __print_symbolic(type, \ { COMPRESS_LZO, "LZO" }, \ { COMPRESS_LZ4, "LZ4" }, \ - { COMPRESS_ZSTD, "ZSTD" }) + { COMPRESS_ZSTD, "ZSTD" }, \ + { COMPRESS_LZORLE, "LZO-RLE" }) struct f2fs_sb_info; struct f2fs_io_info; -- cgit v1.2.3-59-g8ed1b From b4b10061ef98c583bcf82a4200703fbaa98c18dc Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 31 Mar 2020 11:43:07 -0700 Subject: f2fs: refactor resize_fs to avoid meta updates in progress Sahitya raised an issue: - prevent meta updates while checkpoint is in progress allocate_segment_for_resize() can cause metapage updates if it requires to change the current node/data segments for resizing. Stop these meta updates when there is a checkpoint already in progress to prevent inconsistent CP data. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 6 ++- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 5 +- fs/f2fs/gc.c | 117 +++++++++++++++++++++++++------------------- fs/f2fs/super.c | 1 - include/trace/events/f2fs.h | 4 +- 6 files changed, 77 insertions(+), 58 deletions(-) (limited to 'include/trace') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 79e605f38f4f..620a386d82c1 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1559,7 +1559,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) return 0; f2fs_warn(sbi, "Start checkpoint disabled!"); } - mutex_lock(&sbi->cp_mutex); + if (cpc->reason != CP_RESIZE) + mutex_lock(&sbi->cp_mutex); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) || @@ -1628,7 +1629,8 @@ stop: f2fs_update_time(sbi, CP_TIME); trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); out: - mutex_unlock(&sbi->cp_mutex); + if (cpc->reason != CP_RESIZE) + mutex_unlock(&sbi->cp_mutex); return err; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1d96f733b1b7..338622069e42 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -194,6 +194,7 @@ enum { #define CP_DISCARD 0x00000010 #define CP_TRIMMED 0x00000020 #define CP_PAUSE 0x00000040 +#define CP_RESIZE 0x00000080 #define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) #define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */ @@ -1471,7 +1472,6 @@ struct f2fs_sb_info { unsigned int segs_per_sec; /* segments per section */ unsigned int secs_per_zone; /* sections per zone */ unsigned int total_sections; /* total section count */ - struct mutex resize_mutex; /* for resize exclusion */ unsigned int total_node_count; /* total node block count */ unsigned int total_valid_node_count; /* valid node block count */ loff_t max_file_blocks; /* max block index of file */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 97cc95a4a8ed..f7de2a1da528 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3312,7 +3312,6 @@ static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) { struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); __u64 block_count; - int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -3324,9 +3323,7 @@ static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) sizeof(block_count))) return -EFAULT; - ret = f2fs_resize_fs(sbi, block_count); - - return ret; + return f2fs_resize_fs(sbi, block_count); } static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 28a8c79c8bdc..f3c45ec2a7e7 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -1405,12 +1406,29 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) GET_SEGNO(sbi, FDEV(0).end_blk) + 1; } -static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, - unsigned int end) +static int free_segment_range(struct f2fs_sb_info *sbi, + unsigned int secs, bool gc_only) { - int type; - unsigned int segno, next_inuse; + unsigned int segno, next_inuse, start, end; + struct cp_control cpc = { CP_RESIZE, 0, 0, 0 }; + int gc_mode, gc_type; int err = 0; + int type; + + /* Force block allocation for GC */ + MAIN_SECS(sbi) -= secs; + start = MAIN_SECS(sbi) * sbi->segs_per_sec; + end = MAIN_SEGS(sbi) - 1; + + mutex_lock(&DIRTY_I(sbi)->seglist_lock); + for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++) + if (SIT_I(sbi)->last_victim[gc_mode] >= start) + SIT_I(sbi)->last_victim[gc_mode] = 0; + + for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++) + if (sbi->next_victim_seg[gc_type] >= start) + sbi->next_victim_seg[gc_type] = NULL_SEGNO; + mutex_unlock(&DIRTY_I(sbi)->seglist_lock); /* Move out cursegs from the target range */ for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++) @@ -1423,18 +1441,24 @@ static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), }; - down_write(&sbi->gc_lock); do_garbage_collect(sbi, segno, &gc_list, FG_GC); - up_write(&sbi->gc_lock); put_gc_inode(&gc_list); - if (get_valid_blocks(sbi, segno, true)) - return -EAGAIN; + if (!gc_only && get_valid_blocks(sbi, segno, true)) { + err = -EAGAIN; + goto out; + } + if (fatal_signal_pending(current)) { + err = -ERESTARTSYS; + goto out; + } } + if (gc_only) + goto out; - err = f2fs_sync_fs(sbi->sb, 1); + err = f2fs_write_checkpoint(sbi, &cpc); if (err) - return err; + goto out; next_inuse = find_next_inuse(FREE_I(sbi), end + 1, start); if (next_inuse <= end) { @@ -1442,6 +1466,8 @@ static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, next_inuse); f2fs_bug_on(sbi, 1); } +out: + MAIN_SECS(sbi) += secs; return err; } @@ -1487,6 +1513,7 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs; MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs; + MAIN_SECS(sbi) += secs; FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs; FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs; F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + blks); @@ -1508,8 +1535,8 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) { __u64 old_block_count, shrunk_blocks; + struct cp_control cpc = { CP_RESIZE, 0, 0, 0 }; unsigned int secs; - int gc_mode, gc_type; int err = 0; __u32 rem; @@ -1544,10 +1571,27 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) return -EINVAL; } - freeze_bdev(sbi->sb->s_bdev); - shrunk_blocks = old_block_count - block_count; secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi)); + + /* stop other GC */ + if (!down_write_trylock(&sbi->gc_lock)) + return -EAGAIN; + + /* stop CP to protect MAIN_SEC in free_segment_range */ + f2fs_lock_op(sbi); + err = free_segment_range(sbi, secs, true); + f2fs_unlock_op(sbi); + up_write(&sbi->gc_lock); + if (err) + return err; + + set_sbi_flag(sbi, SBI_IS_RESIZEFS); + + freeze_super(sbi->sb); + down_write(&sbi->gc_lock); + mutex_lock(&sbi->cp_mutex); + spin_lock(&sbi->stat_lock); if (shrunk_blocks + valid_user_blocks(sbi) + sbi->current_reserved_blocks + sbi->unusable_block_count + @@ -1556,69 +1600,44 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) else sbi->user_block_count -= shrunk_blocks; spin_unlock(&sbi->stat_lock); - if (err) { - thaw_bdev(sbi->sb->s_bdev, sbi->sb); - return err; - } - - mutex_lock(&sbi->resize_mutex); - set_sbi_flag(sbi, SBI_IS_RESIZEFS); - - mutex_lock(&DIRTY_I(sbi)->seglist_lock); - - MAIN_SECS(sbi) -= secs; - - for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++) - if (SIT_I(sbi)->last_victim[gc_mode] >= - MAIN_SECS(sbi) * sbi->segs_per_sec) - SIT_I(sbi)->last_victim[gc_mode] = 0; - - for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++) - if (sbi->next_victim_seg[gc_type] >= - MAIN_SECS(sbi) * sbi->segs_per_sec) - sbi->next_victim_seg[gc_type] = NULL_SEGNO; - - mutex_unlock(&DIRTY_I(sbi)->seglist_lock); + if (err) + goto out_err; - err = free_segment_range(sbi, MAIN_SECS(sbi) * sbi->segs_per_sec, - MAIN_SEGS(sbi) - 1); + err = free_segment_range(sbi, secs, false); if (err) - goto out; + goto recover_out; update_sb_metadata(sbi, -secs); err = f2fs_commit_super(sbi, false); if (err) { update_sb_metadata(sbi, secs); - goto out; + goto recover_out; } - mutex_lock(&sbi->cp_mutex); update_fs_metadata(sbi, -secs); clear_sbi_flag(sbi, SBI_IS_RESIZEFS); set_sbi_flag(sbi, SBI_IS_DIRTY); - mutex_unlock(&sbi->cp_mutex); - err = f2fs_sync_fs(sbi->sb, 1); + err = f2fs_write_checkpoint(sbi, &cpc); if (err) { - mutex_lock(&sbi->cp_mutex); update_fs_metadata(sbi, secs); - mutex_unlock(&sbi->cp_mutex); update_sb_metadata(sbi, secs); f2fs_commit_super(sbi, false); } -out: +recover_out: if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_err(sbi, "resize_fs failed, should run fsck to repair!"); - MAIN_SECS(sbi) += secs; spin_lock(&sbi->stat_lock); sbi->user_block_count += shrunk_blocks; spin_unlock(&sbi->stat_lock); } +out_err: + mutex_unlock(&sbi->cp_mutex); + up_write(&sbi->gc_lock); + thaw_super(sbi->sb); clear_sbi_flag(sbi, SBI_IS_RESIZEFS); - mutex_unlock(&sbi->resize_mutex); - thaw_bdev(sbi->sb->s_bdev, sbi->sb); return err; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f6c5c7ec5a12..441eaaf9739c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3408,7 +3408,6 @@ try_onemore: init_rwsem(&sbi->gc_lock); mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); - mutex_init(&sbi->resize_mutex); init_rwsem(&sbi->node_write); init_rwsem(&sbi->node_change); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 757d3d6031e6..4dbcdc6d2738 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -50,6 +50,7 @@ TRACE_DEFINE_ENUM(CP_RECOVERY); TRACE_DEFINE_ENUM(CP_DISCARD); TRACE_DEFINE_ENUM(CP_TRIMMED); TRACE_DEFINE_ENUM(CP_PAUSE); +TRACE_DEFINE_ENUM(CP_RESIZE); #define show_block_type(type) \ __print_symbolic(type, \ @@ -126,7 +127,8 @@ TRACE_DEFINE_ENUM(CP_PAUSE); { CP_RECOVERY, "Recovery" }, \ { CP_DISCARD, "Discard" }, \ { CP_PAUSE, "Pause" }, \ - { CP_TRIMMED, "Trimmed" }) + { CP_TRIMMED, "Trimmed" }, \ + { CP_RESIZE, "Resize" }) #define show_fsync_cpreason(type) \ __print_symbolic(type, \ -- cgit v1.2.3-59-g8ed1b From 9c1223845a37ce09fd498b8c8ed061decff20eda Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 23 Apr 2020 18:03:06 +0800 Subject: f2fs: add compressed/gc data read IO stat in order to account data read IOs more accurately. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 + fs/f2fs/f2fs.h | 2 ++ fs/f2fs/gc.c | 2 ++ fs/f2fs/sysfs.c | 7 +++++++ include/trace/events/f2fs.h | 11 ++++++++--- 5 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include/trace') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 4d871d27a85f..48a622b95b76 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2234,6 +2234,7 @@ submit_and_realloc: inc_page_count(sbi, F2FS_RD_DATA); f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); + f2fs_update_iostat(sbi, FS_CDATA_READ_IO, F2FS_BLKSIZE); ClearPageError(page); *last_block_in_bio = blkaddr; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 338622069e42..51863e4f5d4e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1149,6 +1149,8 @@ enum iostat_type { APP_READ_IO, /* app read IOs */ APP_MAPPED_READ_IO, /* app mapped read IOs */ FS_DATA_READ_IO, /* data read IOs */ + FS_GDATA_READ_IO, /* data read IOs from background gc */ + FS_CDATA_READ_IO, /* compressed data read IOs */ FS_NODE_READ_IO, /* node read IOs */ FS_META_READ_IO, /* meta read IOs */ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index f3c45ec2a7e7..5b95d5a146eb 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -740,6 +740,7 @@ got_it: f2fs_put_page(page, 1); f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); + f2fs_update_iostat(sbi, FS_GDATA_READ_IO, F2FS_BLKSIZE); return 0; put_encrypted_page: @@ -846,6 +847,7 @@ static int move_data_block(struct inode *inode, block_t bidx, } f2fs_update_iostat(fio.sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); + f2fs_update_iostat(fio.sbi, FS_GDATA_READ_IO, F2FS_BLKSIZE); lock_page(mpage); if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) || diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index eaf8088548f0..a117ae1f9d5f 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -803,6 +803,7 @@ static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, seq_printf(seq, "time: %-16llu\n", now); /* print app write IOs */ + seq_puts(seq, "[WRITE]\n"); seq_printf(seq, "app buffered: %-16llu\n", sbi->rw_iostat[APP_BUFFERED_IO]); seq_printf(seq, "app direct: %-16llu\n", @@ -829,6 +830,7 @@ static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, sbi->rw_iostat[FS_CP_META_IO]); /* print app read IOs */ + seq_puts(seq, "[READ]\n"); seq_printf(seq, "app buffered: %-16llu\n", sbi->rw_iostat[APP_BUFFERED_READ_IO]); seq_printf(seq, "app direct: %-16llu\n", @@ -839,12 +841,17 @@ static int __maybe_unused iostat_info_seq_show(struct seq_file *seq, /* print fs read IOs */ seq_printf(seq, "fs data: %-16llu\n", sbi->rw_iostat[FS_DATA_READ_IO]); + seq_printf(seq, "fs gc data: %-16llu\n", + sbi->rw_iostat[FS_GDATA_READ_IO]); + seq_printf(seq, "fs compr_data: %-16llu\n", + sbi->rw_iostat[FS_CDATA_READ_IO]); seq_printf(seq, "fs node: %-16llu\n", sbi->rw_iostat[FS_NODE_READ_IO]); seq_printf(seq, "fs meta: %-16llu\n", sbi->rw_iostat[FS_META_READ_IO]); /* print other IOs */ + seq_puts(seq, "[OTHER]\n"); seq_printf(seq, "fs discard: %-16llu\n", sbi->rw_iostat[FS_DISCARD]); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 4dbcdc6d2738..a8e2e83c91ce 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1840,6 +1840,8 @@ TRACE_EVENT(f2fs_iostat, __field(unsigned long long, app_rio) __field(unsigned long long, app_mrio) __field(unsigned long long, fs_drio) + __field(unsigned long long, fs_gdrio) + __field(unsigned long long, fs_cdrio) __field(unsigned long long, fs_nrio) __field(unsigned long long, fs_mrio) __field(unsigned long long, fs_discard) @@ -1864,6 +1866,8 @@ TRACE_EVENT(f2fs_iostat, __entry->app_rio = iostat[APP_READ_IO]; __entry->app_mrio = iostat[APP_MAPPED_READ_IO]; __entry->fs_drio = iostat[FS_DATA_READ_IO]; + __entry->fs_gdrio = iostat[FS_GDATA_READ_IO]; + __entry->fs_cdrio = iostat[FS_CDATA_READ_IO]; __entry->fs_nrio = iostat[FS_NODE_READ_IO]; __entry->fs_mrio = iostat[FS_META_READ_IO]; __entry->fs_discard = iostat[FS_DISCARD]; @@ -1875,15 +1879,16 @@ TRACE_EVENT(f2fs_iostat, "gc [data=%llu, node=%llu], " "cp [data=%llu, node=%llu, meta=%llu], " "app [read=%llu (direct=%llu, buffered=%llu), mapped=%llu], " - "fs [data=%llu, node=%llu, meta=%llu]", + "fs [data=%llu, (gc_data=%llu, compr_data=%llu), " + "node=%llu, meta=%llu]", show_dev(__entry->dev), __entry->app_wio, __entry->app_dio, __entry->app_bio, __entry->app_mio, __entry->fs_dio, __entry->fs_nio, __entry->fs_mio, __entry->fs_discard, __entry->fs_gc_dio, __entry->fs_gc_nio, __entry->fs_cp_dio, __entry->fs_cp_nio, __entry->fs_cp_mio, __entry->app_rio, __entry->app_drio, __entry->app_brio, - __entry->app_mrio, __entry->fs_drio, __entry->fs_nrio, - __entry->fs_mrio) + __entry->app_mrio, __entry->fs_drio, __entry->fs_gdrio, + __entry->fs_cdrio, __entry->fs_nrio, __entry->fs_mrio) ); #endif /* _TRACE_F2FS_H */ -- cgit v1.2.3-59-g8ed1b