From 48ec6328de6c153a64474d9e5b6ec95f20f4142b Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 12 Jul 2023 15:46:37 +0800 Subject: ubifs: Fix some kernel-doc comments Add description of @time and @flags in ubifs_update_time(). to silence the warnings: fs/ubifs/file.c:1383: warning: Function parameter or member 'time' not described in 'ubifs_update_time' fs/ubifs/file.c:1383: warning: Function parameter or member 'flags' not described in 'ubifs_update_time' Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=5848 Signed-off-by: Yang Li Reviewed-by: Zhihao Cheng Signed-off-by: Richard Weinberger --- fs/ubifs/file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index e5382f0b2587..3345be5edb9f 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1375,6 +1375,9 @@ static inline int mctime_update_needed(const struct inode *inode, /** * ubifs_update_time - update time of inode. * @inode: inode to update + * @time: timespec structure to hold the current time value + * @flags: time updating control flag determines updating + * which time fields of @inode * * This function updates time of the inode. */ -- cgit v1.2.3-59-g8ed1b From f4a04c97fb3b7edd7694e725b7dcf0d6901ebcdd Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Tue, 18 Jul 2023 14:41:45 +0200 Subject: ubifs: Fix memory leak of bud->log_hash Ensure that the allocated bud->log_hash (if any) is freed in all cases when the bud itself is freed, to fix this leak caught by kmemleak: # keyctl add logon foo:bar data @s # echo clear > /sys/kernel/debug/kmemleak # mount -t ubifs /dev/ubi0_0 mnt -o auth_hash_name=sha256,auth_key=foo:bar # echo a > mnt/x # umount mnt # mount -t ubifs /dev/ubi0_0 mnt -o auth_hash_name=sha256,auth_key=foo:bar # umount mnt # sleep 5 # echo scan > /sys/kernel/debug/kmemleak # echo scan > /sys/kernel/debug/kmemleak # cat /sys/kernel/debug/kmemleak unreferenced object 0xff... (size 128): comm "mount" backtrace: __kmalloc __ubifs_hash_get_desc+0x5d/0xe0 ubifs ubifs_replay_journal ubifs_mount ... Fixes: da8ef65f9573 ("ubifs: Authenticate replayed journal") Signed-off-by: Vincent Whitchurch Reviewed-by: Zhihao Cheng Signed-off-by: Richard Weinberger --- fs/ubifs/super.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index b08fb28d16b5..610dddc68eba 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -923,8 +923,10 @@ static void free_buds(struct ubifs_info *c) { struct ubifs_bud *bud, *n; - rbtree_postorder_for_each_entry_safe(bud, n, &c->buds, rb) + rbtree_postorder_for_each_entry_safe(bud, n, &c->buds, rb) { + kfree(bud->log_hash); kfree(bud); + } } /** @@ -1193,6 +1195,7 @@ static void destroy_journal(struct ubifs_info *c) bud = list_entry(c->old_buds.next, struct ubifs_bud, list); list_del(&bud->list); + kfree(bud->log_hash); kfree(bud); } ubifs_destroy_idx_gc(c); -- cgit v1.2.3-59-g8ed1b From 60f2f4a81d486348587a6901e744ca8f22dd9637 Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Fri, 18 Aug 2023 17:18:48 +0800 Subject: ubifs: Fix missing error code err Fix smatch warning: fs/ubifs/journal.c:1610 ubifs_jnl_truncate() warn: missing error code 'err' Signed-off-by: Ferry Meng Reviewed-by: Zhihao Cheng Signed-off-by: Richard Weinberger --- fs/ubifs/journal.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index ffc9beee7be6..e6cceeb8573b 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -1607,6 +1607,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, ubifs_err(c, "bad data node (block %u, inode %lu)", blk, inode->i_ino); ubifs_dump_node(c, dn, dn_size); + err = -EUCLEAN; goto out_free; } -- cgit v1.2.3-59-g8ed1b From 4d18b5a57b16c21cf868369ca555068722c32b2d Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 28 Aug 2023 14:38:34 +0800 Subject: ubi: fastmap: Fix missed ec updating after erasing old fastmap data block After running fsstress on ubifs for a long time, UBI(16384 blocks, fastmap takes 2 blocks) has an erase block with different erase counters displayed from two views: From ubiscan view: PEB 8031 has erase counter 31581 ========================================================= from to count min avg max --------------------------------------------------------- 0 .. 9: 0 0 0 0 10 .. 99: 0 0 0 0 100 .. 999: 16383 290 315 781 1000 .. 9999: 0 0 0 0 10000 .. 99999: 1 31581 31581 31581 100000 .. inf: 0 0 0 0 --------------------------------------------------------- Total : 16384 290 317 31581 From detailed_erase_block_info view: PEB 8031 has erase counter 7 physical_block_number erase_count 8030 421 8031 7 # mem info is different from disk info 8032 434 8033 425 8034 431 Following process missed updating erase counter in wl_entry(in memory): ubi_update_fastmap for (i = 1; i < new_fm->used_blocks; i++) // update fastmap data if (!tmp_e) if (old_fm && old_fm->e[i]) erase_block(ubi, old_fm->e[i]->pnum) ret = ubi_io_sync_erase(ubi, pnum, 0) ec = be64_to_cpu(ec_hdr->ec) ec += ret ec_hdr->ec = cpu_to_be64(ec) ubi_io_write_ec_hdr(ubi, pnum, ec_hdr) // ec is updated on flash // ec is not updated in old_fm->e[i] (in memory) Fix it by passing wl_enter into erase_block() and updating erase counter in erase_block(). Fixes: dbb7d2a88d2a ("UBI: Add fastmap core") Signed-off-by: Zhihao Cheng Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/fastmap.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 28c8151a0725..f8c230acc55e 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -1392,11 +1392,12 @@ out: /** * erase_block - Manually erase a PEB. * @ubi: UBI device object - * @pnum: PEB to be erased + * @e: the physical eraseblock to erase * - * Returns the new EC value on success, < 0 indicates an internal error. + * This function returns zero in case of success and a negative error code in + * case of failure. */ -static int erase_block(struct ubi_device *ubi, int pnum) +static int erase_block(struct ubi_device *ubi, struct ubi_wl_entry *e) { int ret; struct ubi_ec_hdr *ec_hdr; @@ -1406,7 +1407,7 @@ static int erase_block(struct ubi_device *ubi, int pnum) if (!ec_hdr) return -ENOMEM; - ret = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0); + ret = ubi_io_read_ec_hdr(ubi, e->pnum, ec_hdr, 0); if (ret < 0) goto out; else if (ret && ret != UBI_IO_BITFLIPS) { @@ -1414,7 +1415,7 @@ static int erase_block(struct ubi_device *ubi, int pnum) goto out; } - ret = ubi_io_sync_erase(ubi, pnum, 0); + ret = ubi_io_sync_erase(ubi, e->pnum, 0); if (ret < 0) goto out; @@ -1426,11 +1427,16 @@ static int erase_block(struct ubi_device *ubi, int pnum) } ec_hdr->ec = cpu_to_be64(ec); - ret = ubi_io_write_ec_hdr(ubi, pnum, ec_hdr); + ret = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr); if (ret < 0) goto out; - ret = ec; + e->ec = ec; + spin_lock(&ubi->wl_lock); + if (e->ec > ubi->max_ec) + ubi->max_ec = e->ec; + spin_unlock(&ubi->wl_lock); + out: kfree(ec_hdr); return ret; @@ -1576,7 +1582,7 @@ int ubi_update_fastmap(struct ubi_device *ubi) if (!tmp_e) { if (old_fm && old_fm->e[i]) { - ret = erase_block(ubi, old_fm->e[i]->pnum); + ret = erase_block(ubi, old_fm->e[i]); if (ret < 0) { ubi_err(ubi, "could not erase old fastmap PEB"); @@ -1628,7 +1634,7 @@ int ubi_update_fastmap(struct ubi_device *ubi) if (old_fm) { /* no fresh anchor PEB was found, reuse the old one */ if (!tmp_e) { - ret = erase_block(ubi, old_fm->e[0]->pnum); + ret = erase_block(ubi, old_fm->e[0]); if (ret < 0) { ubi_err(ubi, "could not erase old anchor PEB"); @@ -1640,7 +1646,6 @@ int ubi_update_fastmap(struct ubi_device *ubi) goto err; } new_fm->e[0] = old_fm->e[0]; - new_fm->e[0]->ec = ret; old_fm->e[0] = NULL; } else { /* we've got a new anchor PEB, return the old one */ -- cgit v1.2.3-59-g8ed1b From 08a4267874164b2e9c8c50831acd466f47208acc Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 28 Aug 2023 14:38:35 +0800 Subject: ubi: fastmap: erase_block: Get erase counter from wl_entry rather than flash Just like sync_erase() does, getting erase counter from wl_entry is faster than reading from flash. Signed-off-by: Zhihao Cheng Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/fastmap.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index f8c230acc55e..05ecdc049343 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -1399,36 +1399,27 @@ out: */ static int erase_block(struct ubi_device *ubi, struct ubi_wl_entry *e) { - int ret; + int err; struct ubi_ec_hdr *ec_hdr; - long long ec; + long long ec = e->ec; ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); if (!ec_hdr) return -ENOMEM; - ret = ubi_io_read_ec_hdr(ubi, e->pnum, ec_hdr, 0); - if (ret < 0) - goto out; - else if (ret && ret != UBI_IO_BITFLIPS) { - ret = -EINVAL; - goto out; - } - - ret = ubi_io_sync_erase(ubi, e->pnum, 0); - if (ret < 0) + err = ubi_io_sync_erase(ubi, e->pnum, 0); + if (err < 0) goto out; - ec = be64_to_cpu(ec_hdr->ec); - ec += ret; + ec += err; if (ec > UBI_MAX_ERASECOUNTER) { - ret = -EINVAL; + err = -EINVAL; goto out; } ec_hdr->ec = cpu_to_be64(ec); - ret = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr); - if (ret < 0) + err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr); + if (err < 0) goto out; e->ec = ec; @@ -1439,7 +1430,7 @@ static int erase_block(struct ubi_device *ubi, struct ubi_wl_entry *e) out: kfree(ec_hdr); - return ret; + return err; } /** -- cgit v1.2.3-59-g8ed1b From a033ab4fec5fd9194d1b6c0306efbdc75f70b142 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 28 Aug 2023 14:38:36 +0800 Subject: ubi: fastmap: Allocate memory with GFP_NOFS in ubi_update_fastmap Function ubi_update_fastmap could be called in IO context, for example: ubifs_writepage do_writepage ubifs_jnl_write_data write_head ubifs_wbuf_write_nolock ubifs_leb_write ubi_leb_write ubi_eba_write_leb try_write_vid_and_data ubi_wl_get_peb ubi_update_fastmap erase_block So it's better to allocate memory with GFP_NOFS mode, in case waiting page writeback(dead loop). Signed-off-by: Zhihao Cheng Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/fastmap.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 05ecdc049343..d64bfb986d40 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -20,7 +20,7 @@ static inline unsigned long *init_seen(struct ubi_device *ubi) if (!ubi_dbg_chk_fastmap(ubi)) return NULL; - ret = bitmap_zalloc(ubi->peb_count, GFP_KERNEL); + ret = bitmap_zalloc(ubi->peb_count, GFP_NOFS); if (!ret) return ERR_PTR(-ENOMEM); @@ -105,7 +105,7 @@ static struct ubi_vid_io_buf *new_fm_vbuf(struct ubi_device *ubi, int vol_id) struct ubi_vid_io_buf *new; struct ubi_vid_hdr *vh; - new = ubi_alloc_vid_buf(ubi, GFP_KERNEL); + new = ubi_alloc_vid_buf(ubi, GFP_NOFS); if (!new) goto out; @@ -1403,7 +1403,7 @@ static int erase_block(struct ubi_device *ubi, struct ubi_wl_entry *e) struct ubi_ec_hdr *ec_hdr; long long ec = e->ec; - ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); if (!ec_hdr) return -ENOMEM; @@ -1459,7 +1459,7 @@ static int invalidate_fastmap(struct ubi_device *ubi) ubi->fm = NULL; ret = -ENOMEM; - fm = kzalloc(sizeof(*fm), GFP_KERNEL); + fm = kzalloc(sizeof(*fm), GFP_NOFS); if (!fm) goto out; @@ -1548,7 +1548,7 @@ int ubi_update_fastmap(struct ubi_device *ubi) return 0; } - new_fm = kzalloc(sizeof(*new_fm), GFP_KERNEL); + new_fm = kzalloc(sizeof(*new_fm), GFP_NOFS); if (!new_fm) { up_write(&ubi->fm_eba_sem); up_write(&ubi->work_sem); -- cgit v1.2.3-59-g8ed1b From c19286d70aaa361cdb073a68a1f66232c359e2fd Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 28 Aug 2023 14:38:37 +0800 Subject: ubi: Replace erase_block() with sync_erase() Since erase_block() has same logic with sync_erase(), just replace it with sync_erase(), also rename 'sync_erase()' to 'ubi_sync_erase()'. Signed-off-by: Zhihao Cheng Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/fastmap.c | 48 ++--------------------------------------------- drivers/mtd/ubi/ubi.h | 1 + drivers/mtd/ubi/wl.c | 9 ++++----- 3 files changed, 7 insertions(+), 51 deletions(-) diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index d64bfb986d40..8f6052cb3217 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -1389,50 +1389,6 @@ out: return ret; } -/** - * erase_block - Manually erase a PEB. - * @ubi: UBI device object - * @e: the physical eraseblock to erase - * - * This function returns zero in case of success and a negative error code in - * case of failure. - */ -static int erase_block(struct ubi_device *ubi, struct ubi_wl_entry *e) -{ - int err; - struct ubi_ec_hdr *ec_hdr; - long long ec = e->ec; - - ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); - if (!ec_hdr) - return -ENOMEM; - - err = ubi_io_sync_erase(ubi, e->pnum, 0); - if (err < 0) - goto out; - - ec += err; - if (ec > UBI_MAX_ERASECOUNTER) { - err = -EINVAL; - goto out; - } - - ec_hdr->ec = cpu_to_be64(ec); - err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr); - if (err < 0) - goto out; - - e->ec = ec; - spin_lock(&ubi->wl_lock); - if (e->ec > ubi->max_ec) - ubi->max_ec = e->ec; - spin_unlock(&ubi->wl_lock); - -out: - kfree(ec_hdr); - return err; -} - /** * invalidate_fastmap - destroys a fastmap. * @ubi: UBI device object @@ -1573,7 +1529,7 @@ int ubi_update_fastmap(struct ubi_device *ubi) if (!tmp_e) { if (old_fm && old_fm->e[i]) { - ret = erase_block(ubi, old_fm->e[i]); + ret = ubi_sync_erase(ubi, old_fm->e[i], 0); if (ret < 0) { ubi_err(ubi, "could not erase old fastmap PEB"); @@ -1625,7 +1581,7 @@ int ubi_update_fastmap(struct ubi_device *ubi) if (old_fm) { /* no fresh anchor PEB was found, reuse the old one */ if (!tmp_e) { - ret = erase_block(ubi, old_fm->e[0]); + ret = ubi_sync_erase(ubi, old_fm->e[0], 0); if (ret < 0) { ubi_err(ubi, "could not erase old anchor PEB"); diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index c8f1bd4fa100..2f0c0eacc013 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -902,6 +902,7 @@ int self_check_eba(struct ubi_device *ubi, struct ubi_attach_info *ai_fastmap, struct ubi_attach_info *ai_scan); /* wl.c */ +int ubi_sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture); int ubi_wl_get_peb(struct ubi_device *ubi); int ubi_wl_put_peb(struct ubi_device *ubi, int vol_id, int lnum, int pnum, int torture); diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 26a214f016c1..0c78e09d7960 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -427,7 +427,7 @@ static int prot_queue_del(struct ubi_device *ubi, int pnum) } /** - * sync_erase - synchronously erase a physical eraseblock. + * ubi_sync_erase - synchronously erase a physical eraseblock. * @ubi: UBI device description object * @e: the physical eraseblock to erase * @torture: if the physical eraseblock has to be tortured @@ -435,8 +435,7 @@ static int prot_queue_del(struct ubi_device *ubi, int pnum) * This function returns zero in case of success and a negative error code in * case of failure. */ -static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, - int torture) +int ubi_sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture) { int err; struct ubi_ec_hdr *ec_hdr; @@ -1094,7 +1093,7 @@ static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk) dbg_wl("erase PEB %d EC %d LEB %d:%d", pnum, e->ec, wl_wrk->vol_id, wl_wrk->lnum); - err = sync_erase(ubi, e, wl_wrk->torture); + err = ubi_sync_erase(ubi, e, wl_wrk->torture); if (!err) { spin_lock(&ubi->wl_lock); @@ -1749,7 +1748,7 @@ static int erase_aeb(struct ubi_device *ubi, struct ubi_ainf_peb *aeb, bool sync ubi->lookuptbl[e->pnum] = e; if (sync) { - err = sync_erase(ubi, e, false); + err = ubi_sync_erase(ubi, e, false); if (err) goto out_free; -- cgit v1.2.3-59-g8ed1b From 8ff4e620ac93a5d332735e4f5a4ff31d80682b9a Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 28 Aug 2023 14:38:38 +0800 Subject: ubi: fastmap: Use free pebs reserved for bad block handling If new bad PEBs occur, UBI firstly consumes ubi->beb_rsvd_pebs, and then ubi->avail_pebs, finally UBI becomes read-only if above two items are 0, which means that the amount of PEBs for user volumes is not effected. Besides, UBI reserves count of free PBEs is ubi->beb_rsvd_pebs while filling wl pool or getting free PEBs, but ubi->avail_pebs is not reserved. So ubi->beb_rsvd_pebs and ubi->avail_pebs have nothing to do with the usage of free PEBs, UBI can use all free PEBs. Commit 78d6d497a648 ("UBI: Move fastmap specific functions out of wl.c") has removed beb_rsvd_pebs checking while filling pool. Now, don't reserve ubi->beb_rsvd_pebs while filling wl_pool. This will fill more PEBs in pool and also reduce fastmap updating frequency. Also remove beb_rsvd_pebs checking in ubi_wl_get_fm_peb. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217787 Signed-off-by: Zhihao Cheng Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/fastmap-wl.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c index 863f571f1adb..4611a75f1241 100644 --- a/drivers/mtd/ubi/fastmap-wl.c +++ b/drivers/mtd/ubi/fastmap-wl.c @@ -76,7 +76,7 @@ struct ubi_wl_entry *ubi_wl_get_fm_peb(struct ubi_device *ubi, int anchor) { struct ubi_wl_entry *e = NULL; - if (!ubi->free.rb_node || (ubi->free_count - ubi->beb_rsvd_pebs < 1)) + if (!ubi->free.rb_node) goto out; if (anchor) @@ -100,28 +100,22 @@ out: /* * has_enough_free_count - whether ubi has enough free pebs to fill fm pools * @ubi: UBI device description object - * @is_wl_pool: whether UBI is filling wear leveling pool * * This helper function checks whether there are enough free pebs (deducted * by fastmap pebs) to fill fm_pool and fm_wl_pool, above rule works after * there is at least one of free pebs is filled into fm_wl_pool. - * For wear leveling pool, UBI should also reserve free pebs for bad pebs - * handling, because there maybe no enough free pebs for user volumes after - * producing new bad pebs. */ -static bool has_enough_free_count(struct ubi_device *ubi, bool is_wl_pool) +static bool has_enough_free_count(struct ubi_device *ubi) { int fm_used = 0; // fastmap non anchor pebs. - int beb_rsvd_pebs; if (!ubi->free.rb_node) return false; - beb_rsvd_pebs = is_wl_pool ? ubi->beb_rsvd_pebs : 0; if (ubi->fm_wl_pool.size > 0 && !(ubi->ro_mode || ubi->fm_disabled)) fm_used = ubi->fm_size / ubi->leb_size - 1; - return ubi->free_count - beb_rsvd_pebs > fm_used; + return ubi->free_count > fm_used; } /** @@ -159,7 +153,7 @@ void ubi_refill_pools(struct ubi_device *ubi) for (;;) { enough = 0; if (pool->size < pool->max_size) { - if (!has_enough_free_count(ubi, false)) + if (!has_enough_free_count(ubi)) break; e = wl_get_wle(ubi); @@ -172,7 +166,7 @@ void ubi_refill_pools(struct ubi_device *ubi) enough++; if (wl_pool->size < wl_pool->max_size) { - if (!has_enough_free_count(ubi, true)) + if (!has_enough_free_count(ubi)) break; e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); -- cgit v1.2.3-59-g8ed1b From a2ea69dac674df0fba59c66146a21145108a85ed Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 28 Aug 2023 14:38:39 +0800 Subject: ubi: fastmap: Wait until there are enough free PEBs before filling pools Wait until there are enough free PEBs before filling pool/wl_pool, sometimes erase_worker is not scheduled in time, which causes two situations: A. There are few PEBs filled in pool, which makes ubi_update_fastmap is frequently called and leads first 64 PEBs are erased more times than other PEBs. So waiting free PEBs before filling pool reduces fastmap updating frequency and prolongs flash service life. B. In situation that space is nearly running out, ubi_refill_pools() cannot make sure pool and wl_pool are filled with free PEBs, caused by the delay of erase_worker. After this patch applied, there must exist free PEBs in pool after one call of ubi_update_fastmap. Besides, this patch is a preparetion for fixing large erase counter in fastmap data block and fixing lapsed wear leveling for first 64 PEBs. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217787 Signed-off-by: Zhihao Cheng Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/eba.c | 3 --- drivers/mtd/ubi/fastmap-wl.c | 53 +++++++++++++++++++++++++++++++++++++++++--- drivers/mtd/ubi/fastmap.c | 6 +---- drivers/mtd/ubi/ubi.h | 5 ++++- drivers/mtd/ubi/wl.c | 14 ++++++++---- 5 files changed, 65 insertions(+), 16 deletions(-) diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index 655ff41863e2..8d1f0e05892c 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -33,9 +33,6 @@ #include #include "ubi.h" -/* Number of physical eraseblocks reserved for atomic LEB change operation */ -#define EBA_RESERVED_PEBS 1 - /** * struct ubi_eba_entry - structure encoding a single LEB -> PEB association * @pnum: the physical eraseblock number attached to the LEB diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c index 4611a75f1241..12854717915a 100644 --- a/drivers/mtd/ubi/fastmap-wl.c +++ b/drivers/mtd/ubi/fastmap-wl.c @@ -97,6 +97,46 @@ out: return e; } +/* + * wait_free_pebs_for_pool - wait until there enough free pebs + * @ubi: UBI device description object + * + * Wait and execute do_work until there are enough free pebs, fill pool + * as much as we can. This will reduce pool refilling times, which can + * reduce the fastmap updating frequency. + */ +static void wait_free_pebs_for_pool(struct ubi_device *ubi) +{ + struct ubi_fm_pool *wl_pool = &ubi->fm_wl_pool; + struct ubi_fm_pool *pool = &ubi->fm_pool; + int free, expect_free, executed; + /* + * There are at least following free pebs which reserved by UBI: + * 1. WL_RESERVED_PEBS[1] + * 2. EBA_RESERVED_PEBS[1] + * 3. fm pebs - 1: Twice fastmap size deducted by fastmap and fm_anchor + * 4. beb_rsvd_pebs: This value should be get under lock ubi->wl_lock + */ + int reserved = WL_RESERVED_PEBS + EBA_RESERVED_PEBS + + ubi->fm_size / ubi->leb_size - 1; + + do { + spin_lock(&ubi->wl_lock); + free = ubi->free_count; + free += pool->size - pool->used + wl_pool->size - wl_pool->used; + expect_free = reserved + ubi->beb_rsvd_pebs; + spin_unlock(&ubi->wl_lock); + + /* + * Break out if there are no works or work is executed failure, + * given the fact that erase_worker will schedule itself when + * -EBUSY is returned from mtd layer caused by system shutdown. + */ + if (do_work(ubi, &executed) || !executed) + break; + } while (free < expect_free); +} + /* * has_enough_free_count - whether ubi has enough free pebs to fill fm pools * @ubi: UBI device description object @@ -119,16 +159,23 @@ static bool has_enough_free_count(struct ubi_device *ubi) } /** - * ubi_refill_pools - refills all fastmap PEB pools. + * ubi_refill_pools_and_lock - refills all fastmap PEB pools and takes fm locks. * @ubi: UBI device description object */ -void ubi_refill_pools(struct ubi_device *ubi) +void ubi_refill_pools_and_lock(struct ubi_device *ubi) { struct ubi_fm_pool *wl_pool = &ubi->fm_wl_pool; struct ubi_fm_pool *pool = &ubi->fm_pool; struct ubi_wl_entry *e; int enough; + if (!ubi->ro_mode && !ubi->fm_disabled) + wait_free_pebs_for_pool(ubi); + + down_write(&ubi->fm_protect); + down_write(&ubi->work_sem); + down_write(&ubi->fm_eba_sem); + spin_lock(&ubi->wl_lock); return_unused_pool_pebs(ubi, wl_pool); @@ -204,7 +251,7 @@ static int produce_free_peb(struct ubi_device *ubi) while (!ubi->free.rb_node && ubi->works_count) { dbg_wl("do one work synchronously"); - err = do_work(ubi); + err = do_work(ubi, NULL); if (err) return err; diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 8f6052cb3217..2a728c31e6b8 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -1491,11 +1491,7 @@ int ubi_update_fastmap(struct ubi_device *ubi) struct ubi_fastmap_layout *new_fm, *old_fm; struct ubi_wl_entry *tmp_e; - down_write(&ubi->fm_protect); - down_write(&ubi->work_sem); - down_write(&ubi->fm_eba_sem); - - ubi_refill_pools(ubi); + ubi_refill_pools_and_lock(ubi); if (ubi->ro_mode || ubi->fm_disabled) { up_write(&ubi->fm_eba_sem); diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 2f0c0eacc013..423f66c91b1d 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -82,6 +82,9 @@ void ubi_err(const struct ubi_device *ubi, const char *fmt, ...); #define UBI_DFS_DIR_NAME "ubi%d" #define UBI_DFS_DIR_LEN (3 + 2 + 1) +/* Number of physical eraseblocks reserved for atomic LEB change operation */ +#define EBA_RESERVED_PEBS 1 + /* * Error codes returned by the I/O sub-system. * @@ -915,7 +918,7 @@ struct ubi_wl_entry *ubi_wl_get_fm_peb(struct ubi_device *ubi, int anchor); int ubi_wl_put_fm_peb(struct ubi_device *ubi, struct ubi_wl_entry *used_e, int lnum, int torture); int ubi_is_erase_work(struct ubi_work *wrk); -void ubi_refill_pools(struct ubi_device *ubi); +void ubi_refill_pools_and_lock(struct ubi_device *ubi); int ubi_ensure_anchor_pebs(struct ubi_device *ubi); int ubi_bitflip_check(struct ubi_device *ubi, int pnum, int force_scrub); diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 0c78e09d7960..14edb65ce6a6 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -181,11 +181,13 @@ static void wl_entry_destroy(struct ubi_device *ubi, struct ubi_wl_entry *e) /** * do_work - do one pending work. * @ubi: UBI device description object + * @executed: whether there is one work is executed * * This function returns zero in case of success and a negative error code in - * case of failure. + * case of failure. If @executed is not NULL and there is one work executed, + * @executed is set as %1, otherwise @executed is set as %0. */ -static int do_work(struct ubi_device *ubi) +static int do_work(struct ubi_device *ubi, int *executed) { int err; struct ubi_work *wrk; @@ -203,9 +205,13 @@ static int do_work(struct ubi_device *ubi) if (list_empty(&ubi->works)) { spin_unlock(&ubi->wl_lock); up_read(&ubi->work_sem); + if (executed) + *executed = 0; return 0; } + if (executed) + *executed = 1; wrk = list_entry(ubi->works.next, struct ubi_work, list); list_del(&wrk->list); ubi->works_count -= 1; @@ -1685,7 +1691,7 @@ int ubi_thread(void *u) } spin_unlock(&ubi->wl_lock); - err = do_work(ubi); + err = do_work(ubi, NULL); if (err) { ubi_err(ubi, "%s: work failed with error code %d", ubi->bgt_name, err); @@ -2096,7 +2102,7 @@ static int produce_free_peb(struct ubi_device *ubi) spin_unlock(&ubi->wl_lock); dbg_wl("do one work synchronously"); - err = do_work(ubi); + err = do_work(ubi, NULL); spin_lock(&ubi->wl_lock); if (err) -- cgit v1.2.3-59-g8ed1b From 415e4723c4325464d489c1ef3335eb6679905471 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 28 Aug 2023 14:38:40 +0800 Subject: ubi: fastmap: Remove unneeded break condition while filling pools Change pool filling stop condition. Commit d09e9a2bddba ("ubi: fastmap: Fix high cpu usage of ubi_bgt by making sure wl_pool not empty") reserves fastmap data PEBs after filling 1 PEB in wl_pool. Now wait_free_pebs_for_pool() makes enough free PEBs before filling pool, there will still be at least 1 PEB in pool and 1 PEB in wl_pool after doing ubi_refill_pools(). Signed-off-by: Zhihao Cheng Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/fastmap-wl.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c index 12854717915a..7c4cfd80da31 100644 --- a/drivers/mtd/ubi/fastmap-wl.c +++ b/drivers/mtd/ubi/fastmap-wl.c @@ -142,8 +142,7 @@ static void wait_free_pebs_for_pool(struct ubi_device *ubi) * @ubi: UBI device description object * * This helper function checks whether there are enough free pebs (deducted - * by fastmap pebs) to fill fm_pool and fm_wl_pool, above rule works after - * there is at least one of free pebs is filled into fm_wl_pool. + * by fastmap pebs) to fill fm_pool and fm_wl_pool. */ static bool has_enough_free_count(struct ubi_device *ubi) { @@ -152,7 +151,7 @@ static bool has_enough_free_count(struct ubi_device *ubi) if (!ubi->free.rb_node) return false; - if (ubi->fm_wl_pool.size > 0 && !(ubi->ro_mode || ubi->fm_disabled)) + if (!ubi->ro_mode && !ubi->fm_disabled) fm_used = ubi->fm_size / ubi->leb_size - 1; return ubi->free_count > fm_used; -- cgit v1.2.3-59-g8ed1b From eada823e6a6fb856548f6e0c7a343cd5c41bb9d3 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 28 Aug 2023 14:38:41 +0800 Subject: ubi: fastmap: may_reserve_for_fm: Don't reserve PEB if fm_anchor exists This is the part 1 to fix cyclically reusing single fastmap data PEBs. After running fsstress on UBIFS for a while, UBI (16384 blocks, fastmap takes 2 blocks) has an erase block(PEB: 8031) with big erase counter greater than any other pebs: ========================================================= from to count min avg max --------------------------------------------------------- 0 .. 9: 0 0 0 0 10 .. 99: 532 84 92 99 100 .. 999: 15787 100 147 229 1000 .. 9999: 64 4699 4765 4826 10000 .. 99999: 0 0 0 0 100000 .. inf: 1 272935 272935 272935 --------------------------------------------------------- Total : 16384 84 180 272935 Not like fm_anchor, there is no candidate PEBs for fastmap data area, so old fastmap data pebs will be reused after all free pebs are filled into pool/wl_pool: ubi_update_fastmap for (i = 1; i < new_fm->used_blocks; i++) erase_block(ubi, old_fm->e[i]->pnum) new_fm->e[i] = old_fm->e[i] According to wear leveling algorithm, UBI selects one small erase counter PEB from ubi->used and one big erase counter PEB from wl_pool, the reused fastmap data PEB is not in these trees. UBI won't schedule this PEB for wl even it is in ubi->used because wl algorithm expects small erase counter for used PEB. Don't reserve PEB for fastmap in may_reserve_for_fm() if fm_anchor already exists. Otherwise, when UBI is running out of free PEBs, the only one free PEB (pnum < 64) will be skipped and fastmap data will be written on the same old PEB. Fixes: dbb7d2a88d2a ("UBI: Add fastmap core") Link: https://bugzilla.kernel.org/show_bug.cgi?id=217787 Signed-off-by: Zhihao Cheng Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/fastmap-wl.c | 2 +- drivers/mtd/ubi/wl.c | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c index 7c4cfd80da31..490514da1e00 100644 --- a/drivers/mtd/ubi/fastmap-wl.c +++ b/drivers/mtd/ubi/fastmap-wl.c @@ -521,7 +521,7 @@ static void ubi_fastmap_close(struct ubi_device *ubi) static struct ubi_wl_entry *may_reserve_for_fm(struct ubi_device *ubi, struct ubi_wl_entry *e, struct rb_root *root) { - if (e && !ubi->fm_disabled && !ubi->fm && + if (e && !ubi->fm_disabled && !ubi->fm && !ubi->fm_anchor && e->pnum < UBI_FM_MAX_START) e = rb_entry(rb_next(root->rb_node), struct ubi_wl_entry, u.rb); diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 14edb65ce6a6..40a1c306b8af 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -367,9 +367,12 @@ static struct ubi_wl_entry *find_mean_wl_entry(struct ubi_device *ubi, if (last->ec - first->ec < WL_FREE_MAX_DIFF) { e = rb_entry(root->rb_node, struct ubi_wl_entry, u.rb); - /* If no fastmap has been written and this WL entry can be used - * as anchor PEB, hold it back and return the second best - * WL entry such that fastmap can use the anchor PEB later. */ + /* + * If no fastmap has been written and fm_anchor is not + * reserved and this WL entry can be used as anchor PEB + * hold it back and return the second best WL entry such + * that fastmap can use the anchor PEB later. + */ e = may_reserve_for_fm(ubi, e, root); } else e = find_wl_entry(ubi, root, WL_FREE_MAX_DIFF/2); -- cgit v1.2.3-59-g8ed1b From 761893bd490b039258fda893c2a15fa59334c820 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 28 Aug 2023 14:38:42 +0800 Subject: ubi: fastmap: Get wl PEB even ec beyonds the 'max' if free PEBs are run out This is the part 2 to fix cyclically reusing single fastmap data PEBs. Consider one situation, if there are four free PEBs for fm_anchor, pool, wl_pool and fastmap data PEB with erase counter 100, 100, 100, 5096 (ubi->beb_rsvd_pebs is 0). PEB with erase counter 5096 is always picked for fastmap data according to the realization of find_wl_entry(), since fastmap data PEB is not scheduled for wl, finally there are two PEBs (fm data) with great erase counter than other PEBS. Get wl PEB even its erase counter exceeds the 'max' in find_wl_entry() when free PEBs are run out after filling pools and fm data. Then the PEB with biggest erase conter is taken as wl PEB, it can be scheduled for wl. Fixes: dbb7d2a88d2a ("UBI: Add fastmap core") Link: https://bugzilla.kernel.org/show_bug.cgi?id=217787 Signed-off-by: Zhihao Cheng Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/fastmap-wl.c | 44 ++++++++++++++++++++++++++++++++++---------- drivers/mtd/ubi/wl.c | 16 ++++++++++------ 2 files changed, 44 insertions(+), 16 deletions(-) diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c index 490514da1e00..03c1f1016c0a 100644 --- a/drivers/mtd/ubi/fastmap-wl.c +++ b/drivers/mtd/ubi/fastmap-wl.c @@ -138,23 +138,44 @@ static void wait_free_pebs_for_pool(struct ubi_device *ubi) } /* - * has_enough_free_count - whether ubi has enough free pebs to fill fm pools + * left_free_count - returns the number of free pebs to fill fm pools * @ubi: UBI device description object * - * This helper function checks whether there are enough free pebs (deducted + * This helper function returns the number of free pebs (deducted * by fastmap pebs) to fill fm_pool and fm_wl_pool. */ -static bool has_enough_free_count(struct ubi_device *ubi) +static int left_free_count(struct ubi_device *ubi) { int fm_used = 0; // fastmap non anchor pebs. if (!ubi->free.rb_node) - return false; + return 0; if (!ubi->ro_mode && !ubi->fm_disabled) fm_used = ubi->fm_size / ubi->leb_size - 1; - return ubi->free_count > fm_used; + return ubi->free_count - fm_used; +} + +/* + * can_fill_pools - whether free PEBs will be left after filling pools + * @ubi: UBI device description object + * @free: current number of free PEBs + * + * Return %1 if there are still left free PEBs after filling pools, + * otherwise %0 is returned. + */ +static int can_fill_pools(struct ubi_device *ubi, int free) +{ + struct ubi_fm_pool *wl_pool = &ubi->fm_wl_pool; + struct ubi_fm_pool *pool = &ubi->fm_pool; + int pool_need = pool->max_size - pool->size + + wl_pool->max_size - wl_pool->size; + + if (free - pool_need < 1) + return 0; + + return 1; } /** @@ -199,7 +220,7 @@ void ubi_refill_pools_and_lock(struct ubi_device *ubi) for (;;) { enough = 0; if (pool->size < pool->max_size) { - if (!has_enough_free_count(ubi)) + if (left_free_count(ubi) <= 0) break; e = wl_get_wle(ubi); @@ -212,10 +233,13 @@ void ubi_refill_pools_and_lock(struct ubi_device *ubi) enough++; if (wl_pool->size < wl_pool->max_size) { - if (!has_enough_free_count(ubi)) + int left_free = left_free_count(ubi); + + if (left_free <= 0) break; - e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); + e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF, + !can_fill_pools(ubi, left_free)); self_check_in_wl_tree(ubi, e, &ubi->free); rb_erase(&e->u.rb, &ubi->free); ubi->free_count--; @@ -355,12 +379,12 @@ static bool need_wear_leveling(struct ubi_device *ubi) if (!e) { if (!ubi->free.rb_node) return false; - e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); + e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF, 0); ec = e->ec; } else { ec = e->ec; if (ubi->free.rb_node) { - e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); + e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF, 0); ec = max(ec, e->ec); } } diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 40a1c306b8af..a357f3d27f2f 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -317,12 +317,14 @@ static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e) * @ubi: UBI device description object * @root: the RB-tree where to look for * @diff: maximum possible difference from the smallest erase counter + * @pick_max: pick PEB even its erase counter beyonds 'min_ec + @diff' * * This function looks for a wear leveling entry with erase counter closest to * min + @diff, where min is the smallest erase counter. */ static struct ubi_wl_entry *find_wl_entry(struct ubi_device *ubi, - struct rb_root *root, int diff) + struct rb_root *root, int diff, + int pick_max) { struct rb_node *p; struct ubi_wl_entry *e; @@ -336,9 +338,11 @@ static struct ubi_wl_entry *find_wl_entry(struct ubi_device *ubi, struct ubi_wl_entry *e1; e1 = rb_entry(p, struct ubi_wl_entry, u.rb); - if (e1->ec >= max) + if (e1->ec >= max) { + if (pick_max) + e = e1; p = p->rb_left; - else { + } else { p = p->rb_right; e = e1; } @@ -375,7 +379,7 @@ static struct ubi_wl_entry *find_mean_wl_entry(struct ubi_device *ubi, */ e = may_reserve_for_fm(ubi, e, root); } else - e = find_wl_entry(ubi, root, WL_FREE_MAX_DIFF/2); + e = find_wl_entry(ubi, root, WL_FREE_MAX_DIFF/2, 0); return e; } @@ -1048,7 +1052,7 @@ static int ensure_wear_leveling(struct ubi_device *ubi, int nested) * %UBI_WL_THRESHOLD. */ e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); - e2 = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); + e2 = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF, 0); if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) goto out_unlock; @@ -2079,7 +2083,7 @@ static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi) { struct ubi_wl_entry *e; - e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF); + e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF, 0); self_check_in_wl_tree(ubi, e, &ubi->free); ubi->free_count--; ubi_assert(ubi->free_count >= 0); -- cgit v1.2.3-59-g8ed1b From 90e0be56144b064be1a816cbdd184d2a8be7061b Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 28 Aug 2023 14:38:43 +0800 Subject: ubi: fastmap: Fix lapsed wear leveling for first 64 PEBs The anchor PEB must be picked from first 64 PEBs, these PEBs could have large erase counter greater than other PEBs especially when free space is nearly running out. The ubi_update_fastmap will be called as long as pool/wl_pool is empty, old anchor PEB is erased when updating fastmap. Given an UBI device with N PEBs, free PEBs is nearly running out and pool will be filled with 1 PEB every time ubi_update_fastmap invoked. So t=N/POOL_SIZE[1]/64 means that in worst case the erase counter of first 64 PEBs is t times greater than other PEBs in theory. After running fsstress for 24h, the erase counter statistics for two UBI devices shown as follow(CONFIG_MTD_UBI_WL_THRESHOLD=128): Device A(1024 PEBs, pool=50, wl_pool=25): ========================================================= from to count min avg max --------------------------------------------------------- 0 .. 9: 0 0 0 0 10 .. 99: 0 0 0 0 100 .. 999: 0 0 0 0 1000 .. 9999: 0 0 0 0 10000 .. 99999: 960 29224 29282 29362 100000 .. inf: 64 117897 117934 117940 --------------------------------------------------------- Total : 1024 29224 34822 117940 Device B(8192 PEBs, pool=256, wl_pool=128): ========================================================= from to count min avg max --------------------------------------------------------- 0 .. 9: 0 0 0 0 10 .. 99: 0 0 0 0 100 .. 999: 0 0 0 0 1000 .. 9999: 8128 2253 2321 2387 10000 .. 99999: 64 35387 35387 35388 100000 .. inf: 0 0 0 0 --------------------------------------------------------- Total : 8192 2253 2579 35388 The key point is reducing fastmap updating frequency by enlarging POOL_SIZE, so let UBI reserve ubi->fm_pool.max_size PEBs during attaching. Then POOL_SIZE will become ubi->fm_pool.max_size/2 even in free space running out case. Given an UBI device with 8192 PEBs(16384\8192\4096 is common large-capacity flash), t=8192/128/64=1. The fastmap updating will happen in either wl_pool or pool is empty, so setting fm_pool_rsv_cnt as ubi->fm_pool.max_size can fill wl_pool in full state. After pool reservation, running fsstress for 24h: Device A(1024 PEBs, pool=50, wl_pool=25): ========================================================= from to count min avg max --------------------------------------------------------- 0 .. 9: 0 0 0 0 10 .. 99: 0 0 0 0 100 .. 999: 0 0 0 0 1000 .. 9999: 0 0 0 0 10000 .. 99999: 1024 33801 33997 34056 100000 .. inf: 0 0 0 0 --------------------------------------------------------- Total : 1024 33801 33997 34056 Device B(8192 PEBs, pool=256, wl_pool=128): ========================================================= from to count min avg max --------------------------------------------------------- 0 .. 9: 0 0 0 0 10 .. 99: 0 0 0 0 100 .. 999: 0 0 0 0 1000 .. 9999: 8192 2205 2397 2460 10000 .. 99999: 0 0 0 0 100000 .. inf: 0 0 0 0 --------------------------------------------------------- Total : 8192 2205 2397 2460 The difference of erase counter between first 64 PEBs and others is under WL_FREE_MAX_DIFF(2*UBI_WL_THRESHOLD=2*128=256). Device A: 34056 - 33801 = 255 Device B: 2460 - 2205 = 255 Next patch will add a switch to control whether UBI needs to reserve PEBs for filling pool. Fixes: dbb7d2a88d2a ("UBI: Add fastmap core") Link: https://bugzilla.kernel.org/show_bug.cgi?id=217787 Signed-off-by: Zhihao Cheng Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/build.c | 1 + drivers/mtd/ubi/fastmap-wl.c | 2 +- drivers/mtd/ubi/ubi.h | 2 ++ drivers/mtd/ubi/wl.h | 6 ++++-- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 8ee51e49fced..66d7b6a16aad 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -951,6 +951,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, UBI_FM_MIN_POOL_SIZE); ubi->fm_wl_pool.max_size = ubi->fm_pool.max_size / 2; + ubi->fm_pool_rsv_cnt = ubi->fm_pool.max_size; ubi->fm_disabled = (!fm_autoconvert || disable_fm) ? 1 : 0; if (fm_debug) ubi_enable_dbg_chk_fastmap(ubi); diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c index 03c1f1016c0a..2a9cc9413c42 100644 --- a/drivers/mtd/ubi/fastmap-wl.c +++ b/drivers/mtd/ubi/fastmap-wl.c @@ -118,7 +118,7 @@ static void wait_free_pebs_for_pool(struct ubi_device *ubi) * 4. beb_rsvd_pebs: This value should be get under lock ubi->wl_lock */ int reserved = WL_RESERVED_PEBS + EBA_RESERVED_PEBS + - ubi->fm_size / ubi->leb_size - 1; + ubi->fm_size / ubi->leb_size - 1 + ubi->fm_pool_rsv_cnt; do { spin_lock(&ubi->wl_lock); diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 423f66c91b1d..6e20a0fee72f 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -494,6 +494,7 @@ struct ubi_debug_info { * @fast_attach: non-zero if UBI was attached by fastmap * @fm_anchor: The next anchor PEB to use for fastmap * @fm_do_produce_anchor: If true produce an anchor PEB in wl + * @fm_pool_rsv_cnt: Number of reserved PEBs for filling pool/wl_pool * * @used: RB-tree of used physical eraseblocks * @erroneous: RB-tree of erroneous used physical eraseblocks @@ -604,6 +605,7 @@ struct ubi_device { int fast_attach; struct ubi_wl_entry *fm_anchor; int fm_do_produce_anchor; + int fm_pool_rsv_cnt; /* Wear-leveling sub-system's stuff */ struct rb_root used; diff --git a/drivers/mtd/ubi/wl.h b/drivers/mtd/ubi/wl.h index 5ebe374a08ae..7b6715ef6d4a 100644 --- a/drivers/mtd/ubi/wl.h +++ b/drivers/mtd/ubi/wl.h @@ -10,8 +10,10 @@ static bool need_wear_leveling(struct ubi_device *ubi); static void ubi_fastmap_close(struct ubi_device *ubi); static inline void ubi_fastmap_init(struct ubi_device *ubi, int *count) { - /* Reserve enough LEBs to store two fastmaps. */ - *count += (ubi->fm_size / ubi->leb_size) * 2; + if (ubi->fm_disabled) + ubi->fm_pool_rsv_cnt = 0; + /* Reserve enough LEBs to store two fastmaps and to fill pools. */ + *count += (ubi->fm_size / ubi->leb_size) * 2 + ubi->fm_pool_rsv_cnt; INIT_WORK(&ubi->fm_work, update_fastmap_work_fn); } static struct ubi_wl_entry *may_reserve_for_fm(struct ubi_device *ubi, -- cgit v1.2.3-59-g8ed1b From d4c48e5b58f12835de779f5425ef741c4d0fb53e Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 28 Aug 2023 14:38:44 +0800 Subject: ubi: fastmap: Add module parameter to control reserving filling pool PEBs Adding 6th module parameter in 'mtd=xxx' to control whether or not reserving PEBs for filling pool/wl_pool. Signed-off-by: Zhihao Cheng Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/build.c | 26 ++++++++++++++++++++++---- drivers/mtd/ubi/cdev.c | 3 ++- drivers/mtd/ubi/ubi.h | 2 +- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 66d7b6a16aad..7d4ff1193db6 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -35,7 +35,7 @@ #define MTD_PARAM_LEN_MAX 64 /* Maximum number of comma-separated items in the 'mtd=' parameter */ -#define MTD_PARAM_MAX_COUNT 5 +#define MTD_PARAM_MAX_COUNT 6 /* Maximum value for the number of bad PEBs per 1024 PEBs */ #define MAX_MTD_UBI_BEB_LIMIT 768 @@ -54,6 +54,7 @@ * @vid_hdr_offs: VID header offset * @max_beb_per1024: maximum expected number of bad PEBs per 1024 PEBs * @enable_fm: enable fastmap when value is non-zero + * @need_resv_pool: reserve pool->max_size pebs when value is none-zero */ struct mtd_dev_param { char name[MTD_PARAM_LEN_MAX]; @@ -61,6 +62,7 @@ struct mtd_dev_param { int vid_hdr_offs; int max_beb_per1024; int enable_fm; + int need_resv_pool; }; /* Numbers of elements set in the @mtd_dev_param array */ @@ -825,6 +827,7 @@ static int autoresize(struct ubi_device *ubi, int vol_id) * @vid_hdr_offset: VID header offset * @max_beb_per1024: maximum expected number of bad PEB per 1024 PEBs * @disable_fm: whether disable fastmap + * @need_resv_pool: whether reserve pebs to fill fm_pool * * This function attaches MTD device @mtd_dev to UBI and assign @ubi_num number * to the newly created UBI device, unless @ubi_num is %UBI_DEV_NUM_AUTO, in @@ -840,7 +843,8 @@ static int autoresize(struct ubi_device *ubi, int vol_id) * @ubi_devices_mutex. */ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, - int vid_hdr_offset, int max_beb_per1024, bool disable_fm) + int vid_hdr_offset, int max_beb_per1024, bool disable_fm, + bool need_resv_pool) { struct ubi_device *ubi; int i, err; @@ -951,7 +955,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, UBI_FM_MIN_POOL_SIZE); ubi->fm_wl_pool.max_size = ubi->fm_pool.max_size / 2; - ubi->fm_pool_rsv_cnt = ubi->fm_pool.max_size; + ubi->fm_pool_rsv_cnt = need_resv_pool ? ubi->fm_pool.max_size : 0; ubi->fm_disabled = (!fm_autoconvert || disable_fm) ? 1 : 0; if (fm_debug) ubi_enable_dbg_chk_fastmap(ubi); @@ -1274,7 +1278,8 @@ static int __init ubi_init(void) mutex_lock(&ubi_devices_mutex); err = ubi_attach_mtd_dev(mtd, p->ubi_num, p->vid_hdr_offs, p->max_beb_per1024, - p->enable_fm == 0); + p->enable_fm == 0, + p->need_resv_pool != 0); mutex_unlock(&ubi_devices_mutex); if (err < 0) { pr_err("UBI error: cannot attach mtd%d\n", @@ -1483,6 +1488,18 @@ static int ubi_mtd_param_parse(const char *val, const struct kernel_param *kp) } else p->enable_fm = 0; + token = tokens[5]; + if (token) { + int err = kstrtoint(token, 10, &p->need_resv_pool); + + if (err) { + pr_err("UBI error: bad value for need_resv_pool parameter: %s\n", + token); + return -EINVAL; + } + } else + p->need_resv_pool = 0; + mtd_devs += 1; return 0; } @@ -1496,6 +1513,7 @@ MODULE_PARM_DESC(mtd, "MTD devices to attach. Parameter format: mtd=max_size pebs during attach. If the value is non-zero, peb reservation is enabled. Default value is 0.\n" "\n" "Example 1: mtd=/dev/mtd0 - attach MTD device /dev/mtd0.\n" "Example 2: mtd=content,1984 mtd=4 - attach MTD device with name \"content\" using VID header offset 1984, and MTD device number 4 with default VID header offset.\n" diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index f43430b9c1e6..98aa00344b29 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -1041,7 +1041,8 @@ static long ctrl_cdev_ioctl(struct file *file, unsigned int cmd, */ mutex_lock(&ubi_devices_mutex); err = ubi_attach_mtd_dev(mtd, req.ubi_num, req.vid_hdr_offset, - req.max_beb_per1024, !!req.disable_fm); + req.max_beb_per1024, !!req.disable_fm, + false); mutex_unlock(&ubi_devices_mutex); if (err < 0) put_mtd_device(mtd); diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 6e20a0fee72f..a5ec566df0d7 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -944,7 +944,7 @@ int ubi_io_write_vid_hdr(struct ubi_device *ubi, int pnum, /* build.c */ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset, int max_beb_per1024, - bool disable_fm); + bool disable_fm, bool need_resv_pool); int ubi_detach_mtd_dev(int ubi_num, int anyway); struct ubi_device *ubi_get_device(int ubi_num); void ubi_put_device(struct ubi_device *ubi); -- cgit v1.2.3-59-g8ed1b From ac085cfe57df2cc1d7a5c4c5e64b8780c8ad452f Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 28 Aug 2023 14:38:45 +0800 Subject: ubi: fastmap: Add control in 'UBI_IOCATT' ioctl to reserve PEBs for filling pools This patch imports a new field 'need_resv_pool' in struct 'ubi_attach_req' to control whether or not reserving free PEBs for filling pool/wl_pool. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217787 Signed-off-by: Zhihao Cheng Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/cdev.c | 2 +- include/uapi/mtd/ubi-user.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index 98aa00344b29..0d8f04cf03c5 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -1042,7 +1042,7 @@ static long ctrl_cdev_ioctl(struct file *file, unsigned int cmd, mutex_lock(&ubi_devices_mutex); err = ubi_attach_mtd_dev(mtd, req.ubi_num, req.vid_hdr_offset, req.max_beb_per1024, !!req.disable_fm, - false); + !!req.need_resv_pool); mutex_unlock(&ubi_devices_mutex); if (err < 0) put_mtd_device(mtd); diff --git a/include/uapi/mtd/ubi-user.h b/include/uapi/mtd/ubi-user.h index dcb179de4358..e1571603175e 100644 --- a/include/uapi/mtd/ubi-user.h +++ b/include/uapi/mtd/ubi-user.h @@ -248,6 +248,7 @@ enum { * @max_beb_per1024: maximum expected number of bad PEB per 1024 PEBs * @padding: reserved for future, not used, has to be zeroed * @disable_fm: whether disable fastmap + * @need_resv_pool: whether reserve free pebs for filling pool/wl_pool * * This data structure is used to specify MTD device UBI has to attach and the * parameters it has to use. The number which should be assigned to the new UBI @@ -293,7 +294,8 @@ struct ubi_attach_req { __s32 vid_hdr_offset; __s16 max_beb_per1024; __s8 disable_fm; - __s8 padding[9]; + __s8 need_resv_pool; + __s8 padding[8]; }; /* -- cgit v1.2.3-59-g8ed1b From d81efd66106c03771ffc8637855a6ec24caa6350 Mon Sep 17 00:00:00 2001 From: Konstantin Meskhidze Date: Tue, 5 Sep 2023 18:12:22 +0800 Subject: ubifs: fix possible dereference after free 'old_idx' could be dereferenced after free via 'rb_link_node' function call. Fixes: b5fda08ef213 ("ubifs: Fix memleak when insert_old_idx() failed") Co-developed-by: Ivanov Mikhail Signed-off-by: Konstantin Meskhidze Reviewed-by: Zhihao Cheng Signed-off-by: Richard Weinberger --- fs/ubifs/tnc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 6b7d95b65f4b..f4728e65d1bd 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -65,6 +65,7 @@ static void do_insert_old_idx(struct ubifs_info *c, else { ubifs_err(c, "old idx added twice!"); kfree(old_idx); + return; } } rb_link_node(&old_idx->rb, parent, p); -- cgit v1.2.3-59-g8ed1b From d07cec9c238ae8fc6c1a9f3f5d30a2f8ec6cdc71 Mon Sep 17 00:00:00 2001 From: ZhaoLong Wang Date: Thu, 21 Sep 2023 10:01:42 +0800 Subject: ubi: block: Fix use-after-free in ubiblock_cleanup The following BUG is reported when a ubiblock is removed: ================================================================== BUG: KASAN: slab-use-after-free in ubiblock_cleanup+0x88/0xa0 [ubi] Read of size 4 at addr ffff88810c8f3804 by task ubiblock/1716 CPU: 5 PID: 1716 Comm: ubiblock Not tainted 6.6.0-rc2+ #135 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014 Call Trace: dump_stack_lvl+0x37/0x50 print_report+0xd0/0x620 kasan_report+0xb6/0xf0 ubiblock_cleanup+0x88/0xa0 [ubi] ubiblock_remove+0x121/0x190 [ubi] vol_cdev_ioctl+0x355/0x630 [ubi] __x64_sys_ioctl+0xc7/0x100 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 RIP: 0033:0x7f08d7445577 Code: b3 66 90 48 8b 05 11 89 2c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e1 8 RSP: 002b:00007ffde05a3018 EFLAGS: 00000206 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00000000ffffffff RCX: 00007f08d7445577 RDX: 0000000000000000 RSI: 0000000000004f08 RDI: 0000000000000003 RBP: 0000000000816010 R08: 00000000008163a7 R09: 0000000000000000 R10: 0000000000000003 R11: 0000000000000206 R12: 0000000000000003 R13: 00007ffde05a3130 R14: 0000000000000000 R15: 0000000000000000 Allocated by task 1715: kasan_save_stack+0x22/0x50 kasan_set_track+0x25/0x30 __kasan_kmalloc+0x7f/0x90 __alloc_disk_node+0x40/0x2b0 __blk_mq_alloc_disk+0x3e/0xb0 ubiblock_create+0x2ba/0x620 [ubi] vol_cdev_ioctl+0x581/0x630 [ubi] __x64_sys_ioctl+0xc7/0x100 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Freed by task 0: kasan_save_stack+0x22/0x50 kasan_set_track+0x25/0x30 kasan_save_free_info+0x2b/0x50 __kasan_slab_free+0x10e/0x190 __kmem_cache_free+0x96/0x220 bdev_free_inode+0xa4/0xf0 rcu_core+0x496/0xec0 __do_softirq+0xeb/0x384 The buggy address belongs to the object at ffff88810c8f3800 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 4 bytes inside of freed 1024-byte region [ffff88810c8f3800, ffff88810c8f3c00) The buggy address belongs to the physical page: page:00000000d03de848 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x10c8f0 head:00000000d03de848 order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0 flags: 0x200000000000840(slab|head|node=0|zone=2) page_type: 0xffffffff() raw: 0200000000000840 ffff888100042dc0 ffffea0004244400 dead000000000002 raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88810c8f3700: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88810c8f3780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88810c8f3800: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88810c8f3880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88810c8f3900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Fix it by using a local variable to record the gendisk ID. Fixes: 77567b25ab9f ("ubi: use blk_mq_alloc_disk and blk_cleanup_disk") Signed-off-by: ZhaoLong Wang Reviewed-by: Zhihao Cheng Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/block.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index 437c5b83ffe5..309a42aeaa4c 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -447,13 +447,15 @@ out_unlock: static void ubiblock_cleanup(struct ubiblock *dev) { + int id = dev->gd->first_minor; + /* Stop new requests to arrive */ del_gendisk(dev->gd); /* Finally destroy the blk queue */ dev_info(disk_to_dev(dev->gd), "released"); put_disk(dev->gd); blk_mq_free_tag_set(&dev->tag_set); - idr_remove(&ubiblock_minor_idr, dev->gd->first_minor); + idr_remove(&ubiblock_minor_idr, id); } int ubiblock_remove(struct ubi_volume_info *vi) -- cgit v1.2.3-59-g8ed1b From 75690493591fe283e4c92a3ba7c4420e9858abdb Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Sat, 23 Sep 2023 11:28:59 +0800 Subject: ubifs: ubifs_link: Fix wrong name len calculating when UBIFS is encrypted The length of dentry name is calculated after the raw name is encrypted, except for ubifs_link(), which could make the size of dir underflow. Here is a reproducer: touch $TMP/file mkdir $TMP/dir stat $TMP/dir for i in $(seq 1 8) do ln $TMP/file $TMP/dir/$i unlink $TMP/dir/$i done stat $TMP/dir The size of dir will be underflow(-96). Fix it by calculating dentry name's length after the name is encrypted. Fixes: f4f61d2cc6d8 ("ubifs: Implement encrypted filenames") Reported-by: Roland Ruckerbauer Link: https://lore.kernel.org/linux-mtd/1638777819.2925845.1695222544742.JavaMail.zimbra@robart.cc/T/#u Signed-off-by: Zhihao Cheng Signed-off-by: Richard Weinberger --- fs/ubifs/dir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 2f48c58d47cd..5dc1ac4d826d 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -724,7 +724,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, struct inode *inode = d_inode(old_dentry); struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_inode *dir_ui = ubifs_inode(dir); - int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); + int err, sz_change; struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2, .dirtied_ino_d = ALIGN(ui->data_len, 8) }; struct fscrypt_name nm; @@ -748,6 +748,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, if (err) return err; + sz_change = CALC_DENT_SIZE(fname_len(&nm)); + err = dbg_check_synced_i_size(c, inode); if (err) goto out_fname; -- cgit v1.2.3-59-g8ed1b