aboutsummaryrefslogtreecommitdiffstats
path: root/fs/f2fs/segment.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/f2fs/segment.c')
-rw-r--r--fs/f2fs/segment.c1778
1 files changed, 1098 insertions, 680 deletions
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index cf0eb002cfd4..acf3d3fa4363 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -9,18 +9,20 @@
#include <linux/f2fs_fs.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
+#include <linux/sched/mm.h>
#include <linux/prefetch.h>
#include <linux/kthread.h>
#include <linux/swap.h>
#include <linux/timer.h>
#include <linux/freezer.h>
#include <linux/sched/signal.h>
+#include <linux/random.h>
#include "f2fs.h"
#include "segment.h"
#include "node.h"
#include "gc.h"
-#include "trace.h"
+#include "iostat.h"
#include <trace/events/f2fs.h>
#define __reverse_ffz(x) __reverse_ffs(~(x))
@@ -28,7 +30,7 @@
static struct kmem_cache *discard_entry_slab;
static struct kmem_cache *discard_cmd_slab;
static struct kmem_cache *sit_entry_set_slab;
-static struct kmem_cache *inmem_entry_slab;
+static struct kmem_cache *revoke_entry_slab;
static unsigned long __reverse_ulong(unsigned char *str)
{
@@ -172,9 +174,9 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
- if (test_opt(sbi, LFS))
+ if (f2fs_lfs_mode(sbi))
return false;
- if (sbi->gc_mode == GC_URGENT)
+ if (sbi->gc_mode == GC_URGENT_HIGH)
return true;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
return true;
@@ -183,300 +185,183 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
}
-void f2fs_register_inmem_page(struct inode *inode, struct page *page)
+void f2fs_abort_atomic_write(struct inode *inode, bool clean)
{
- struct inmem_pages *new;
-
- f2fs_trace_pid(page);
-
- f2fs_set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
-
- new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
-
- /* add atomic page indices to the list */
- new->page = page;
- INIT_LIST_HEAD(&new->list);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
- /* increase reference count with clean state */
- get_page(page);
- mutex_lock(&F2FS_I(inode)->inmem_lock);
- list_add_tail(&new->list, &F2FS_I(inode)->inmem_pages);
- inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
- mutex_unlock(&F2FS_I(inode)->inmem_lock);
+ if (!f2fs_is_atomic_file(inode))
+ return;
- trace_f2fs_register_inmem_page(page, INMEM);
+ if (clean)
+ truncate_inode_pages_final(inode->i_mapping);
+ clear_inode_flag(fi->cow_inode, FI_COW_FILE);
+ iput(fi->cow_inode);
+ fi->cow_inode = NULL;
+ release_atomic_write_cnt(inode);
+ clear_inode_flag(inode, FI_ATOMIC_FILE);
+ stat_dec_atomic_inode(inode);
}
-static int __revoke_inmem_pages(struct inode *inode,
- struct list_head *head, bool drop, bool recover,
- bool trylock)
+static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
+ block_t new_addr, block_t *old_addr, bool recover)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct inmem_pages *cur, *tmp;
- int err = 0;
-
- list_for_each_entry_safe(cur, tmp, head, list) {
- struct page *page = cur->page;
-
- if (drop)
- trace_f2fs_commit_inmem_page(page, INMEM_DROP);
-
- if (trylock) {
- /*
- * to avoid deadlock in between page lock and
- * inmem_lock.
- */
- if (!trylock_page(page))
- continue;
- } else {
- lock_page(page);
- }
-
- f2fs_wait_on_page_writeback(page, DATA, true, true);
-
- if (recover) {
- struct dnode_of_data dn;
- struct node_info ni;
+ struct dnode_of_data dn;
+ struct node_info ni;
+ int err;
- trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
retry:
- set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = f2fs_get_dnode_of_data(&dn, page->index,
- LOOKUP_NODE);
- if (err) {
- if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- cond_resched();
- goto retry;
- }
- err = -EAGAIN;
- goto next;
- }
-
- err = f2fs_get_node_info(sbi, dn.nid, &ni);
- if (err) {
- f2fs_put_dnode(&dn);
- return err;
- }
-
- if (cur->old_addr == NEW_ADDR) {
- f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
- f2fs_update_data_blkaddr(&dn, NEW_ADDR);
- } else
- f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
- cur->old_addr, ni.version, true, true);
- f2fs_put_dnode(&dn);
- }
-next:
- /* we don't need to invalidate this in the sccessful status */
- if (drop || recover) {
- ClearPageUptodate(page);
- clear_cold_data(page);
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE_RA);
+ if (err) {
+ if (err == -ENOMEM) {
+ f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
+ goto retry;
}
- f2fs_clear_page_private(page);
- f2fs_put_page(page, 1);
-
- list_del(&cur->list);
- kmem_cache_free(inmem_entry_slab, cur);
- dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
+ return err;
}
- return err;
-}
-void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
-{
- struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
- struct inode *inode;
- struct f2fs_inode_info *fi;
- unsigned int count = sbi->atomic_files;
- unsigned int looped = 0;
-next:
- spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
- if (list_empty(head)) {
- spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
- return;
+ err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
+ if (err) {
+ f2fs_put_dnode(&dn);
+ return err;
}
- fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
- inode = igrab(&fi->vfs_inode);
- if (inode)
- list_move_tail(&fi->inmem_ilist, head);
- spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
- if (inode) {
- if (gc_failure) {
- if (!fi->i_gc_failures[GC_FAILURE_ATOMIC])
- goto skip;
+ if (recover) {
+ /* dn.data_blkaddr is always valid */
+ if (!__is_valid_data_blkaddr(new_addr)) {
+ if (new_addr == NULL_ADDR)
+ dec_valid_block_count(sbi, inode, 1);
+ f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
+ f2fs_update_data_blkaddr(&dn, new_addr);
+ } else {
+ f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
+ new_addr, ni.version, true, true);
}
- set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
- f2fs_drop_inmem_pages(inode);
-skip:
- iput(inode);
- }
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- cond_resched();
- if (gc_failure) {
- if (++looped >= count)
- return;
- }
- goto next;
-}
-
-void f2fs_drop_inmem_pages(struct inode *inode)
-{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct f2fs_inode_info *fi = F2FS_I(inode);
+ } else {
+ blkcnt_t count = 1;
- while (!list_empty(&fi->inmem_pages)) {
- mutex_lock(&fi->inmem_lock);
- __revoke_inmem_pages(inode, &fi->inmem_pages,
- true, false, true);
- mutex_unlock(&fi->inmem_lock);
+ *old_addr = dn.data_blkaddr;
+ f2fs_truncate_data_blocks_range(&dn, 1);
+ dec_valid_block_count(sbi, F2FS_I(inode)->cow_inode, count);
+ inc_valid_block_count(sbi, inode, &count);
+ f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr,
+ ni.version, true, false);
}
- fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
-
- spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
- if (!list_empty(&fi->inmem_ilist))
- list_del_init(&fi->inmem_ilist);
- if (f2fs_is_atomic_file(inode)) {
- clear_inode_flag(inode, FI_ATOMIC_FILE);
- sbi->atomic_files--;
- }
- spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+ f2fs_put_dnode(&dn);
+ return 0;
}
-void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
+static void __complete_revoke_list(struct inode *inode, struct list_head *head,
+ bool revoke)
{
- struct f2fs_inode_info *fi = F2FS_I(inode);
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct list_head *head = &fi->inmem_pages;
- struct inmem_pages *cur = NULL;
-
- f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page));
+ struct revoke_entry *cur, *tmp;
- mutex_lock(&fi->inmem_lock);
- list_for_each_entry(cur, head, list) {
- if (cur->page == page)
- break;
+ list_for_each_entry_safe(cur, tmp, head, list) {
+ if (revoke)
+ __replace_atomic_write_block(inode, cur->index,
+ cur->old_addr, NULL, true);
+ list_del(&cur->list);
+ kmem_cache_free(revoke_entry_slab, cur);
}
-
- f2fs_bug_on(sbi, list_empty(head) || cur->page != page);
- list_del(&cur->list);
- mutex_unlock(&fi->inmem_lock);
-
- dec_page_count(sbi, F2FS_INMEM_PAGES);
- kmem_cache_free(inmem_entry_slab, cur);
-
- ClearPageUptodate(page);
- f2fs_clear_page_private(page);
- f2fs_put_page(page, 0);
-
- trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
}
-static int __f2fs_commit_inmem_pages(struct inode *inode)
+static int __f2fs_commit_atomic_write(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
- struct inmem_pages *cur, *tmp;
- struct f2fs_io_info fio = {
- .sbi = sbi,
- .ino = inode->i_ino,
- .type = DATA,
- .op = REQ_OP_WRITE,
- .op_flags = REQ_SYNC | REQ_PRIO,
- .io_type = FS_DATA_IO,
- };
+ struct inode *cow_inode = fi->cow_inode;
+ struct revoke_entry *new;
struct list_head revoke_list;
- bool submit_bio = false;
- int err = 0;
+ block_t blkaddr;
+ struct dnode_of_data dn;
+ pgoff_t len = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+ pgoff_t off = 0, blen, index;
+ int ret = 0, i;
INIT_LIST_HEAD(&revoke_list);
- list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
- struct page *page = cur->page;
+ while (len) {
+ blen = min_t(pgoff_t, ADDRS_PER_BLOCK(cow_inode), len);
- lock_page(page);
- if (page->mapping == inode->i_mapping) {
- trace_f2fs_commit_inmem_page(page, INMEM);
+ set_new_dnode(&dn, cow_inode, NULL, NULL, 0);
+ ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
+ if (ret && ret != -ENOENT) {
+ goto out;
+ } else if (ret == -ENOENT) {
+ ret = 0;
+ if (dn.max_level == 0)
+ goto out;
+ goto next;
+ }
- f2fs_wait_on_page_writeback(page, DATA, true, true);
+ blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, cow_inode),
+ len);
+ index = off;
+ for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) {
+ blkaddr = f2fs_data_blkaddr(&dn);
- set_page_dirty(page);
- if (clear_page_dirty_for_io(page)) {
- inode_dec_dirty_pages(inode);
- f2fs_remove_dirty_inode(inode);
+ if (!__is_valid_data_blkaddr(blkaddr)) {
+ continue;
+ } else if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
+ DATA_GENERIC_ENHANCE)) {
+ f2fs_put_dnode(&dn);
+ ret = -EFSCORRUPTED;
+ f2fs_handle_error(sbi,
+ ERROR_INVALID_BLKADDR);
+ goto out;
}
-retry:
- fio.page = page;
- fio.old_blkaddr = NULL_ADDR;
- fio.encrypted_page = NULL;
- fio.need_lock = LOCK_DONE;
- err = f2fs_do_write_data_page(&fio);
- if (err) {
- if (err == -ENOMEM) {
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- cond_resched();
- goto retry;
- }
- unlock_page(page);
- break;
+
+ new = f2fs_kmem_cache_alloc(revoke_entry_slab, GFP_NOFS,
+ true, NULL);
+
+ ret = __replace_atomic_write_block(inode, index, blkaddr,
+ &new->old_addr, false);
+ if (ret) {
+ f2fs_put_dnode(&dn);
+ kmem_cache_free(revoke_entry_slab, new);
+ goto out;
}
- /* record old blkaddr for revoking */
- cur->old_addr = fio.old_blkaddr;
- submit_bio = true;
+
+ f2fs_update_data_blkaddr(&dn, NULL_ADDR);
+ new->index = index;
+ list_add_tail(&new->list, &revoke_list);
}
- unlock_page(page);
- list_move_tail(&cur->list, &revoke_list);
+ f2fs_put_dnode(&dn);
+next:
+ off += blen;
+ len -= blen;
}
- if (submit_bio)
- f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA);
-
- if (err) {
- /*
- * try to revoke all committed pages, but still we could fail
- * due to no memory or other reason, if that happened, EAGAIN
- * will be returned, which means in such case, transaction is
- * already not integrity, caller should use journal to do the
- * recovery or rewrite & commit last transaction. For other
- * error number, revoking was done by filesystem itself.
- */
- err = __revoke_inmem_pages(inode, &revoke_list,
- false, true, false);
+out:
+ if (ret)
+ sbi->revoked_atomic_block += fi->atomic_write_cnt;
+ else
+ sbi->committed_atomic_block += fi->atomic_write_cnt;
- /* drop all uncommitted pages */
- __revoke_inmem_pages(inode, &fi->inmem_pages,
- true, false, false);
- } else {
- __revoke_inmem_pages(inode, &revoke_list,
- false, false, false);
- }
+ __complete_revoke_list(inode, &revoke_list, ret ? true : false);
- return err;
+ return ret;
}
-int f2fs_commit_inmem_pages(struct inode *inode)
+int f2fs_commit_atomic_write(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
int err;
- f2fs_balance_fs(sbi, true);
-
- down_write(&fi->i_gc_rwsem[WRITE]);
+ err = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
+ if (err)
+ return err;
+ f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
f2fs_lock_op(sbi);
- set_inode_flag(inode, FI_ATOMIC_COMMIT);
- mutex_lock(&fi->inmem_lock);
- err = __f2fs_commit_inmem_pages(inode);
- mutex_unlock(&fi->inmem_lock);
-
- clear_inode_flag(inode, FI_ATOMIC_COMMIT);
+ err = __f2fs_commit_atomic_write(inode);
f2fs_unlock_op(sbi);
- up_write(&fi->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
return err;
}
@@ -489,12 +374,12 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
{
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(sbi, FAULT_CHECKPOINT);
- f2fs_stop_checkpoint(sbi, false);
+ f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT);
}
/* balance_fs_bg is able to be pending */
if (need && excess_cached_nats(sbi))
- f2fs_balance_fs_bg(sbi);
+ f2fs_balance_fs_bg(sbi, false);
if (!f2fs_is_checkpoint_ready(sbi))
return;
@@ -504,12 +389,49 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
* dir/node pages without enough free segments.
*/
if (has_not_enough_free_secs(sbi, 0, 0)) {
- down_write(&sbi->gc_lock);
- f2fs_gc(sbi, false, false, NULL_SEGNO);
+ if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
+ sbi->gc_thread->f2fs_gc_task) {
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
+ TASK_UNINTERRUPTIBLE);
+ wake_up(&sbi->gc_thread->gc_wait_queue_head);
+ io_schedule();
+ finish_wait(&sbi->gc_thread->fggc_wq, &wait);
+ } else {
+ struct f2fs_gc_control gc_control = {
+ .victim_segno = NULL_SEGNO,
+ .init_gc_type = BG_GC,
+ .no_bg_gc = true,
+ .should_migrate_blocks = false,
+ .err_gc_skipped = false,
+ .nr_free_secs = 1 };
+ f2fs_down_write(&sbi->gc_lock);
+ f2fs_gc(sbi, &gc_control);
+ }
}
}
-void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
+static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi)
+{
+ int factor = f2fs_rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2;
+ unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS);
+ unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
+ unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES);
+ unsigned int meta = get_pages(sbi, F2FS_DIRTY_META);
+ unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
+ unsigned int threshold = sbi->blocks_per_seg * factor *
+ DEFAULT_DIRTY_THRESHOLD;
+ unsigned int global_threshold = threshold * 3 / 2;
+
+ if (dents >= threshold || qdata >= threshold ||
+ nodes >= threshold || meta >= threshold ||
+ imeta >= threshold)
+ return true;
+ return dents + qdata + nodes + meta + imeta > global_threshold;
+}
+
+void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
{
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
return;
@@ -527,47 +449,44 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
else
f2fs_build_free_nids(sbi, false, false);
- if (!is_idle(sbi, REQ_TIME) &&
- (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
+ if (excess_dirty_nats(sbi) || excess_dirty_threshold(sbi) ||
+ excess_prefree_segs(sbi) || !f2fs_space_for_roll_forward(sbi))
+ goto do_sync;
+
+ /* there is background inflight IO or foreground operation recently */
+ if (is_inflight_io(sbi, REQ_TIME) ||
+ (!f2fs_time_over(sbi, REQ_TIME) && f2fs_rwsem_is_locked(&sbi->cp_rwsem)))
return;
+ /* exceed periodical checkpoint timeout threshold */
+ if (f2fs_time_over(sbi, CP_TIME))
+ goto do_sync;
+
/* checkpoint is the only way to shrink partial cached entries */
- if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
- !f2fs_available_free_memory(sbi, INO_ENTRIES) ||
- excess_prefree_segs(sbi) ||
- excess_dirty_nats(sbi) ||
- excess_dirty_nodes(sbi) ||
- f2fs_time_over(sbi, CP_TIME)) {
- if (test_opt(sbi, DATA_FLUSH)) {
- struct blk_plug plug;
-
- mutex_lock(&sbi->flush_lock);
-
- blk_start_plug(&plug);
- f2fs_sync_dirty_inodes(sbi, FILE_INODE);
- blk_finish_plug(&plug);
+ if (f2fs_available_free_memory(sbi, NAT_ENTRIES) &&
+ f2fs_available_free_memory(sbi, INO_ENTRIES))
+ return;
- mutex_unlock(&sbi->flush_lock);
- }
- f2fs_sync_fs(sbi->sb, true);
- stat_inc_bg_cp_count(sbi->stat_info);
+do_sync:
+ if (test_opt(sbi, DATA_FLUSH) && from_bg) {
+ struct blk_plug plug;
+
+ mutex_lock(&sbi->flush_lock);
+
+ blk_start_plug(&plug);
+ f2fs_sync_dirty_inodes(sbi, FILE_INODE, false);
+ blk_finish_plug(&plug);
+
+ mutex_unlock(&sbi->flush_lock);
}
+ f2fs_sync_fs(sbi->sb, 1);
+ stat_inc_bg_cp_count(sbi->stat_info);
}
static int __submit_flush_wait(struct f2fs_sb_info *sbi,
struct block_device *bdev)
{
- struct bio *bio;
- int ret;
-
- bio = f2fs_bio_alloc(sbi, 0, false);
- if (!bio)
- return -ENOMEM;
-
- bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
- bio_set_dev(bio, bdev);
- ret = submit_bio_wait(bio);
- bio_put(bio);
+ int ret = blkdev_issue_flush(bdev);
trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
test_opt(sbi, FLUSH_MERGE), ret);
@@ -601,8 +520,6 @@ repeat:
if (kthread_should_stop())
return 0;
- sb_start_intwrite(sbi->sb);
-
if (!llist_empty(&fcc->issue_list)) {
struct flush_cmd *cmd, *next;
int ret;
@@ -623,8 +540,6 @@ repeat:
fcc->dispatch_list = NULL;
}
- sb_end_intwrite(sbi->sb);
-
wait_event_interruptible(*q,
kthread_should_stop() || !llist_empty(&fcc->issue_list));
goto repeat;
@@ -661,7 +576,11 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
llist_add(&cmd.llnode, &fcc->issue_list);
- /* update issue_list before we wake up issue_flush thread */
+ /*
+ * update issue_list before we wake up issue_flush thread, this
+ * smp_mb() pairs with another barrier in ___wait_event(), see
+ * more details in comments of waitqueue_active().
+ */
smp_mb();
if (waitqueue_active(&fcc->flush_wait_queue))
@@ -726,7 +645,7 @@ init_thread:
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(fcc->f2fs_issue_flush)) {
err = PTR_ERR(fcc->f2fs_issue_flush);
- kvfree(fcc);
+ kfree(fcc);
SM_I(sbi)->fcc_info = NULL;
return err;
}
@@ -745,7 +664,7 @@ void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
kthread_stop(flush_thread);
}
if (free) {
- kvfree(fcc);
+ kfree(fcc);
SM_I(sbi)->fcc_info = NULL;
}
}
@@ -757,12 +676,26 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
if (!f2fs_is_multi_device(sbi))
return 0;
+ if (test_opt(sbi, NOBARRIER))
+ return 0;
+
for (i = 1; i < sbi->s_ndevs; i++) {
+ int count = DEFAULT_RETRY_IO_COUNT;
+
if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
continue;
- ret = __submit_flush_wait(sbi, FDEV(i).bdev);
- if (ret)
+
+ do {
+ ret = __submit_flush_wait(sbi, FDEV(i).bdev);
+ if (ret)
+ f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
+ } while (ret && --count);
+
+ if (ret) {
+ f2fs_stop_checkpoint(sbi, false,
+ STOP_CP_REASON_FLUSH_FAIL);
break;
+ }
spin_lock(&sbi->dev_lock);
f2fs_clear_bit(i, (char *)&sbi->dirty_device);
@@ -794,6 +727,18 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
}
if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
dirty_i->nr_dirty[t]++;
+
+ if (__is_large_section(sbi)) {
+ unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
+ block_t valid_blocks =
+ get_valid_blocks(sbi, segno, true);
+
+ f2fs_bug_on(sbi, unlikely(!valid_blocks ||
+ valid_blocks == CAP_BLKS_PER_SEC(sbi)));
+
+ if (!IS_CURSEC(sbi, secno))
+ set_bit(secno, dirty_i->dirty_secmap);
+ }
}
}
@@ -801,6 +746,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
enum dirty_type dirty_type)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ block_t valid_blocks;
if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
dirty_i->nr_dirty[dirty_type]--;
@@ -812,13 +758,26 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
dirty_i->nr_dirty[t]--;
- if (get_valid_blocks(sbi, segno, true) == 0) {
+ valid_blocks = get_valid_blocks(sbi, segno, true);
+ if (valid_blocks == 0) {
clear_bit(GET_SEC_FROM_SEG(sbi, segno),
dirty_i->victim_secmap);
#ifdef CONFIG_F2FS_CHECK_FS
clear_bit(segno, SIT_I(sbi)->invalid_segmap);
#endif
}
+ if (__is_large_section(sbi)) {
+ unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
+
+ if (!valid_blocks ||
+ valid_blocks == CAP_BLKS_PER_SEC(sbi)) {
+ clear_bit(secno, dirty_i->dirty_secmap);
+ return;
+ }
+
+ if (!IS_CURSEC(sbi, secno))
+ set_bit(secno, dirty_i->dirty_secmap);
+ }
}
}
@@ -831,20 +790,22 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
unsigned short valid_blocks, ckpt_valid_blocks;
+ unsigned int usable_blocks;
if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
return;
+ usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
mutex_lock(&dirty_i->seglist_lock);
valid_blocks = get_valid_blocks(sbi, segno, false);
- ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
+ ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false);
if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
- ckpt_valid_blocks == sbi->blocks_per_seg)) {
+ ckpt_valid_blocks == usable_blocks)) {
__locate_dirty_segment(sbi, segno, PRE);
__remove_dirty_segment(sbi, segno, DIRTY);
- } else if (valid_blocks < sbi->blocks_per_seg) {
+ } else if (valid_blocks < usable_blocks) {
__locate_dirty_segment(sbi, segno, DIRTY);
} else {
/* Recovery routine with SSR needs this */
@@ -887,9 +848,11 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
se = get_seg_entry(sbi, segno);
if (IS_NODESEG(se->type))
- holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
+ holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) -
+ se->valid_blocks;
else
- holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
+ holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) -
+ se->valid_blocks;
}
mutex_unlock(&dirty_i->seglist_lock);
@@ -921,7 +884,7 @@ static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
if (get_valid_blocks(sbi, segno, false))
continue;
- if (get_ckpt_valid_blocks(sbi, segno))
+ if (get_ckpt_valid_blocks(sbi, segno, false))
continue;
mutex_unlock(&dirty_i->seglist_lock);
return segno;
@@ -942,7 +905,7 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
pend_list = &dcc->pend_list[plist_idx(len)];
- dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
+ dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS, true, NULL);
INIT_LIST_HEAD(&dc->list);
dc->bdev = bdev;
dc->lstart = lstart;
@@ -1027,9 +990,9 @@ static void f2fs_submit_discard_endio(struct bio *bio)
struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
unsigned long flags;
- dc->error = blk_status_to_errno(bio->bi_status);
-
spin_lock_irqsave(&dc->lock, flags);
+ if (!dc->error)
+ dc->error = blk_status_to_errno(bio->bi_status);
dc->bio_ref--;
if (!dc->bio_ref && dc->state == D_SUBMIT) {
dc->state = D_DONE;
@@ -1070,39 +1033,43 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
int discard_type, unsigned int granularity)
{
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+
/* common policy */
dpolicy->type = discard_type;
dpolicy->sync = true;
dpolicy->ordered = false;
dpolicy->granularity = granularity;
- dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
+ dpolicy->max_requests = dcc->max_discard_request;
dpolicy->io_aware_gran = MAX_PLIST_NUM;
- dpolicy->timeout = 0;
+ dpolicy->timeout = false;
if (discard_type == DPOLICY_BG) {
- dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
- dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
- dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+ dpolicy->min_interval = dcc->min_discard_issue_time;
+ dpolicy->mid_interval = dcc->mid_discard_issue_time;
+ dpolicy->max_interval = dcc->max_discard_issue_time;
dpolicy->io_aware = true;
dpolicy->sync = false;
dpolicy->ordered = true;
if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
dpolicy->granularity = 1;
- dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+ if (atomic_read(&dcc->discard_cmd_cnt))
+ dpolicy->max_interval =
+ dcc->min_discard_issue_time;
}
} else if (discard_type == DPOLICY_FORCE) {
- dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
- dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
- dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+ dpolicy->min_interval = dcc->min_discard_issue_time;
+ dpolicy->mid_interval = dcc->mid_discard_issue_time;
+ dpolicy->max_interval = dcc->max_discard_issue_time;
dpolicy->io_aware = false;
} else if (discard_type == DPOLICY_FSTRIM) {
dpolicy->io_aware = false;
} else if (discard_type == DPOLICY_UMOUNT) {
- dpolicy->max_requests = UINT_MAX;
dpolicy->io_aware = false;
/* we need to issue all to keep CP_TRIMMED_FLAG */
dpolicy->granularity = 1;
+ dpolicy->timeout = true;
}
}
@@ -1116,13 +1083,12 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
unsigned int *issued)
{
struct block_device *bdev = dc->bdev;
- struct request_queue *q = bdev_get_queue(bdev);
unsigned int max_discard_blocks =
- SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
+ SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
&(dcc->fstrim_list) : &(dcc->wait_list);
- int flag = dpolicy->sync ? REQ_SYNC : 0;
+ blk_opf_t flag = dpolicy->sync ? REQ_SYNC : 0;
block_t lstart, start, len, total_len;
int err = 0;
@@ -1165,7 +1131,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
err = __blkdev_issue_discard(bdev,
SECTOR_FROM_BLOCK(start),
SECTOR_FROM_BLOCK(len),
- GFP_NOFS, 0, &bio);
+ GFP_NOFS, &bio);
submit:
if (err) {
spin_lock_irqsave(&dc->lock, flags);
@@ -1204,7 +1170,7 @@ submit:
atomic_inc(&dcc->issued_discard);
- f2fs_update_iostat(sbi, FS_DISCARD, 1);
+ f2fs_update_iostat(sbi, NULL, FS_DISCARD, 1);
lstart += len;
start += len;
@@ -1212,12 +1178,14 @@ submit:
len = total_len;
}
- if (!err && len)
+ if (!err && len) {
+ dcc->undiscard_blks -= len;
__update_discard_tree_range(sbi, bdev, lstart, start, len);
+ }
return err;
}
-static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
+static void __insert_discard_tree(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t lstart,
block_t start, block_t len,
struct rb_node **insert_p,
@@ -1226,7 +1194,6 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct rb_node **p;
struct rb_node *parent = NULL;
- struct discard_cmd *dc = NULL;
bool leftmost = true;
if (insert_p && insert_parent) {
@@ -1238,12 +1205,8 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent,
lstart, &leftmost);
do_insert:
- dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
+ __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
p, leftmost);
- if (!dc)
- return NULL;
-
- return dc;
}
static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
@@ -1298,9 +1261,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
struct discard_cmd *dc;
struct discard_info di = {0};
struct rb_node **insert_p = NULL, *insert_parent = NULL;
- struct request_queue *q = bdev_get_queue(bdev);
unsigned int max_discard_blocks =
- SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
+ SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
block_t end = lstart + len;
dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
@@ -1460,6 +1422,8 @@ next:
return issued;
}
+static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
+ struct discard_policy *dpolicy);
static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
@@ -1468,15 +1432,17 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct list_head *pend_list;
struct discard_cmd *dc, *tmp;
struct blk_plug plug;
- int i, issued = 0;
+ int i, issued;
bool io_interrupted = false;
- if (dpolicy->timeout != 0)
- f2fs_update_time(sbi, dpolicy->timeout);
+ if (dpolicy->timeout)
+ f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT);
+retry:
+ issued = 0;
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
- if (dpolicy->timeout != 0 &&
- f2fs_time_over(sbi, dpolicy->timeout))
+ if (dpolicy->timeout &&
+ f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
break;
if (i + 1 < dpolicy->granularity)
@@ -1492,13 +1458,13 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
goto next;
if (unlikely(dcc->rbtree_check))
f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
- &dcc->root));
+ &dcc->root, false));
blk_start_plug(&plug);
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
- if (dpolicy->timeout != 0 &&
- f2fs_time_over(sbi, dpolicy->timeout))
+ if (dpolicy->timeout &&
+ f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
break;
if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
@@ -1520,6 +1486,11 @@ next:
break;
}
+ if (dpolicy->type == DPOLICY_UMOUNT && issued) {
+ __wait_all_discard_cmd(sbi, dpolicy);
+ goto retry;
+ }
+
if (!issued && io_interrupted)
issued = -1;
@@ -1580,33 +1551,32 @@ static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
&(dcc->fstrim_list) : &(dcc->wait_list);
- struct discard_cmd *dc, *tmp;
- bool need_wait;
+ struct discard_cmd *dc = NULL, *iter, *tmp;
unsigned int trimmed = 0;
next:
- need_wait = false;
+ dc = NULL;
mutex_lock(&dcc->cmd_lock);
- list_for_each_entry_safe(dc, tmp, wait_list, list) {
- if (dc->lstart + dc->len <= start || end <= dc->lstart)
+ list_for_each_entry_safe(iter, tmp, wait_list, list) {
+ if (iter->lstart + iter->len <= start || end <= iter->lstart)
continue;
- if (dc->len < dpolicy->granularity)
+ if (iter->len < dpolicy->granularity)
continue;
- if (dc->state == D_DONE && !dc->ref) {
- wait_for_completion_io(&dc->wait);
- if (!dc->error)
- trimmed += dc->len;
- __remove_discard_cmd(sbi, dc);
+ if (iter->state == D_DONE && !iter->ref) {
+ wait_for_completion_io(&iter->wait);
+ if (!iter->error)
+ trimmed += iter->len;
+ __remove_discard_cmd(sbi, iter);
} else {
- dc->ref++;
- need_wait = true;
+ iter->ref++;
+ dc = iter;
break;
}
}
mutex_unlock(&dcc->cmd_lock);
- if (need_wait) {
+ if (dc) {
trimmed += __wait_one_discard_bio(sbi, dc);
goto next;
}
@@ -1677,7 +1647,6 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
__init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
dcc->discard_granularity);
- dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT;
__issue_discard_cmd(sbi, &dpolicy);
dropped = __drop_discard_cmd(sbi);
@@ -1694,14 +1663,21 @@ static int issue_discard_thread(void *data)
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
wait_queue_head_t *q = &dcc->discard_wait_queue;
struct discard_policy dpolicy;
- unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
+ unsigned int wait_ms = dcc->min_discard_issue_time;
int issued;
set_freezable();
do {
- __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
- dcc->discard_granularity);
+ if (sbi->gc_mode == GC_URGENT_HIGH ||
+ !f2fs_available_free_memory(sbi, DISCARD_CACHE))
+ __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
+ else
+ __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
+ dcc->discard_granularity);
+
+ if (!atomic_read(&dcc->discard_cmd_cnt))
+ wait_ms = dpolicy.max_interval;
wait_event_interruptible_timeout(*q,
kthread_should_stop() || freezing(current) ||
@@ -1725,9 +1701,8 @@ static int issue_discard_thread(void *data)
wait_ms = dpolicy.max_interval;
continue;
}
-
- if (sbi->gc_mode == GC_URGENT)
- __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
+ if (!atomic_read(&dcc->discard_cmd_cnt))
+ continue;
sb_start_intwrite(sbi->sb);
@@ -1735,7 +1710,7 @@ static int issue_discard_thread(void *data)
if (issued > 0) {
__wait_all_discard_cmd(sbi, &dpolicy);
wait_ms = dpolicy.min_interval;
- } else if (issued == -1){
+ } else if (issued == -1) {
wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
if (!wait_ms)
wait_ms = dpolicy.mid_interval;
@@ -1830,7 +1805,8 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
offset = GET_BLKOFF_FROM_SEG0(sbi, i);
- if (!f2fs_test_and_set_bit(offset, se->discard_map))
+ if (f2fs_block_unit_discard(sbi) &&
+ !f2fs_test_and_set_bit(offset, se->discard_map))
sbi->discard_blks--;
}
@@ -1855,7 +1831,8 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
int i;
- if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi))
+ if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi) ||
+ !f2fs_block_unit_discard(sbi))
return false;
if (!force) {
@@ -1886,7 +1863,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
if (!de) {
de = f2fs_kmem_cache_alloc(discard_entry_slab,
- GFP_F2FS_ZERO);
+ GFP_F2FS_ZERO, true, NULL);
de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
list_add_tail(&de->list, head);
}
@@ -1925,7 +1902,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
mutex_lock(&dirty_i->seglist_lock);
for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
- __set_test_and_free(sbi, segno);
+ __set_test_and_free(sbi, segno, false);
mutex_unlock(&dirty_i->seglist_lock);
}
@@ -1940,14 +1917,18 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
unsigned int start = 0, end = -1;
unsigned int secno, start_segno;
bool force = (cpc->reason & CP_DISCARD);
- bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
+ bool section_alignment = F2FS_OPTION(sbi).discard_unit ==
+ DISCARD_UNIT_SECTION;
+
+ if (f2fs_lfs_mode(sbi) && __is_large_section(sbi))
+ section_alignment = true;
mutex_lock(&dirty_i->seglist_lock);
while (1) {
int i;
- if (need_align && end != -1)
+ if (section_alignment && end != -1)
end--;
start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
if (start >= MAIN_SEGS(sbi))
@@ -1955,7 +1936,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
start + 1);
- if (need_align) {
+ if (section_alignment) {
start = rounddown(start, sbi->segs_per_sec);
end = roundup(end, sbi->segs_per_sec);
}
@@ -1972,7 +1953,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
(end - 1) <= cpc->trim_end)
continue;
- if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) {
+ if (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi)) {
f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
(end - start) << sbi->log_blocks_per_seg);
continue;
@@ -1993,6 +1974,9 @@ next:
}
mutex_unlock(&dirty_i->seglist_lock);
+ if (!f2fs_block_unit_discard(sbi))
+ goto wakeup;
+
/* send small discards */
list_for_each_entry_safe(entry, this, head, list) {
unsigned int cur_pos = 0, next_pos, len, total_len = 0;
@@ -2026,12 +2010,29 @@ skip:
dcc->nr_discards -= total_len;
}
+wakeup:
wake_up_discard_thread(sbi, false);
}
-static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
+int f2fs_start_discard_thread(struct f2fs_sb_info *sbi)
{
dev_t dev = sbi->sb->s_bdev->bd_dev;
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ int err = 0;
+
+ if (!f2fs_realtime_discard_enable(sbi))
+ return 0;
+
+ dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
+ "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
+ if (IS_ERR(dcc->f2fs_issue_discard))
+ err = PTR_ERR(dcc->f2fs_issue_discard);
+
+ return err;
+}
+
+static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
+{
struct discard_cmd_control *dcc;
int err = 0, i;
@@ -2045,6 +2046,11 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
return -ENOMEM;
dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
+ if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
+ dcc->discard_granularity = sbi->blocks_per_seg;
+ else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
+ dcc->discard_granularity = BLKS_PER_SEC(sbi);
+
INIT_LIST_HEAD(&dcc->entry_list);
for (i = 0; i < MAX_PLIST_NUM; i++)
INIT_LIST_HEAD(&dcc->pend_list[i]);
@@ -2056,6 +2062,10 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
atomic_set(&dcc->discard_cmd_cnt, 0);
dcc->nr_discards = 0;
dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
+ dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST;
+ dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME;
+ dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME;
+ dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME;
dcc->undiscard_blks = 0;
dcc->next_pos = 0;
dcc->root = RB_ROOT_CACHED;
@@ -2064,13 +2074,10 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
init_waitqueue_head(&dcc->discard_wait_queue);
SM_I(sbi)->dcc_info = dcc;
init_thread:
- dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
- "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
- if (IS_ERR(dcc->f2fs_issue_discard)) {
- err = PTR_ERR(dcc->f2fs_issue_discard);
- kvfree(dcc);
+ err = f2fs_start_discard_thread(sbi);
+ if (err) {
+ kfree(dcc);
SM_I(sbi)->dcc_info = NULL;
- return err;
}
return err;
@@ -2092,7 +2099,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
if (unlikely(atomic_read(&dcc->discard_cmd_cnt)))
f2fs_issue_discard_timeout(sbi);
- kvfree(dcc);
+ kfree(dcc);
SM_I(sbi)->dcc_info = NULL;
}
@@ -2112,11 +2119,45 @@ static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
unsigned int segno, int modified)
{
struct seg_entry *se = get_seg_entry(sbi, segno);
+
se->type = type;
if (modified)
__mark_sit_entry_dirty(sbi, segno);
}
+static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi,
+ block_t blkaddr)
+{
+ unsigned int segno = GET_SEGNO(sbi, blkaddr);
+
+ if (segno == NULL_SEGNO)
+ return 0;
+ return get_seg_entry(sbi, segno)->mtime;
+}
+
+static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr,
+ unsigned long long old_mtime)
+{
+ struct seg_entry *se;
+ unsigned int segno = GET_SEGNO(sbi, blkaddr);
+ unsigned long long ctime = get_mtime(sbi, false);
+ unsigned long long mtime = old_mtime ? old_mtime : ctime;
+
+ if (segno == NULL_SEGNO)
+ return;
+
+ se = get_seg_entry(sbi, segno);
+
+ if (!se->mtime)
+ se->mtime = mtime;
+ else
+ se->mtime = div_u64(se->mtime * se->valid_blocks + mtime,
+ se->valid_blocks + 1);
+
+ if (ctime > SIT_I(sbi)->max_mtime)
+ SIT_I(sbi)->max_mtime = ctime;
+}
+
static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
{
struct seg_entry *se;
@@ -2133,13 +2174,10 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
new_vblocks = se->valid_blocks + del;
offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
- f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
- (new_vblocks > sbi->blocks_per_seg)));
+ f2fs_bug_on(sbi, (new_vblocks < 0 ||
+ (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno))));
se->valid_blocks = new_vblocks;
- se->mtime = get_mtime(sbi, false);
- if (se->mtime > SIT_I(sbi)->max_mtime)
- SIT_I(sbi)->max_mtime = se->mtime;
/* Update valid block bitmap */
if (del > 0) {
@@ -2161,7 +2199,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
del = 0;
}
- if (!f2fs_test_and_set_bit(offset, se->discard_map))
+ if (f2fs_block_unit_discard(sbi) &&
+ !f2fs_test_and_set_bit(offset, se->discard_map))
sbi->discard_blks--;
/*
@@ -2203,7 +2242,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
}
}
- if (f2fs_test_and_clear_bit(offset, se->discard_map))
+ if (f2fs_block_unit_discard(sbi) &&
+ f2fs_test_and_clear_bit(offset, se->discard_map))
sbi->discard_blks++;
}
if (!f2fs_test_bit(offset, se->ckpt_valid_map))
@@ -2228,10 +2268,12 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
return;
invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
+ f2fs_invalidate_compress_page(sbi, addr);
/* add it into sit main buffer */
down_write(&sit_i->sentry_lock);
+ update_segment_mtime(sbi, addr, 0);
update_sit_entry(sbi, addr, -1);
/* add it into dirty seglist */
@@ -2272,6 +2314,7 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
void *addr = curseg->sum_blk;
+
addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
memcpy(addr, sum, sizeof(struct f2fs_summary));
}
@@ -2311,7 +2354,9 @@ int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
*/
struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
{
- return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
+ if (unlikely(f2fs_cp_error(sbi)))
+ return ERR_PTR(-EIO);
+ return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
}
void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
@@ -2356,9 +2401,9 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
f2fs_put_page(page, 1);
}
-static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
+static int is_next_segment_free(struct f2fs_sb_info *sbi,
+ struct curseg_info *curseg, int type)
{
- struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int segno = curseg->segno + 1;
struct free_segmap_info *free_i = FREE_I(sbi);
@@ -2396,8 +2441,8 @@ find_other_zone:
secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
if (secno >= MAIN_SECS(sbi)) {
if (dir == ALLOC_RIGHT) {
- secno = find_next_zero_bit(free_i->free_secmap,
- MAIN_SECS(sbi), 0);
+ secno = find_first_zero_bit(free_i->free_secmap,
+ MAIN_SECS(sbi));
f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
} else {
go_left = 1;
@@ -2412,8 +2457,8 @@ find_other_zone:
left_start--;
continue;
}
- left_start = find_next_zero_bit(free_i->free_secmap,
- MAIN_SECS(sbi), 0);
+ left_start = find_first_zero_bit(free_i->free_secmap,
+ MAIN_SECS(sbi));
f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
break;
}
@@ -2462,7 +2507,9 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
struct summary_footer *sum_footer;
+ unsigned short seg_type = curseg->seg_type;
+ curseg->inited = true;
curseg->segno = curseg->next_segno;
curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
curseg->next_blkoff = 0;
@@ -2470,24 +2517,38 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
sum_footer = &(curseg->sum_blk->footer);
memset(sum_footer, 0, sizeof(struct summary_footer));
- if (IS_DATASEG(type))
+
+ sanity_check_seg_type(sbi, seg_type);
+
+ if (IS_DATASEG(seg_type))
SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
- if (IS_NODESEG(type))
+ if (IS_NODESEG(seg_type))
SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
- __set_sit_entry_type(sbi, type, curseg->segno, modified);
+ __set_sit_entry_type(sbi, seg_type, curseg->segno, modified);
}
static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned short seg_type = curseg->seg_type;
+
+ sanity_check_seg_type(sbi, seg_type);
+ if (f2fs_need_rand_seg(sbi))
+ return prandom_u32_max(MAIN_SECS(sbi) * sbi->segs_per_sec);
+
/* if segs_per_sec is large than 1, we need to keep original policy. */
if (__is_large_section(sbi))
- return CURSEG_I(sbi, type)->segno;
+ return curseg->segno;
+
+ /* inmem log may not locate on any segment after mount */
+ if (!curseg->inited)
+ return 0;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
return 0;
if (test_opt(sbi, NOHEAP) &&
- (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
+ (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type)))
return 0;
if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
@@ -2497,7 +2558,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
return 0;
- return CURSEG_I(sbi, type)->segno;
+ return curseg->segno;
}
/*
@@ -2507,12 +2568,14 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned short seg_type = curseg->seg_type;
unsigned int segno = curseg->segno;
int dir = ALLOC_LEFT;
- write_sum_page(sbi, curseg->sum_blk,
+ if (curseg->inited)
+ write_sum_page(sbi, curseg->sum_blk,
GET_SUM_BLOCK(sbi, segno));
- if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
+ if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
dir = ALLOC_RIGHT;
if (test_opt(sbi, NOHEAP))
@@ -2523,24 +2586,25 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
curseg->next_segno = segno;
reset_curseg(sbi, type, 1);
curseg->alloc_type = LFS;
+ if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
+ curseg->fragment_remained_chunk =
+ prandom_u32_max(sbi->max_fragment_chunk) + 1;
}
-static void __next_free_blkoff(struct f2fs_sb_info *sbi,
- struct curseg_info *seg, block_t start)
+static int __next_free_blkoff(struct f2fs_sb_info *sbi,
+ int segno, block_t start)
{
- struct seg_entry *se = get_seg_entry(sbi, seg->segno);
+ struct seg_entry *se = get_seg_entry(sbi, segno);
int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
unsigned long *target_map = SIT_I(sbi)->tmp_map;
unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
- int i, pos;
+ int i;
for (i = 0; i < entries; i++)
target_map[i] = ckpt_map[i] | cur_map[i];
- pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
-
- seg->next_blkoff = pos;
+ return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
}
/*
@@ -2551,17 +2615,34 @@ static void __next_free_blkoff(struct f2fs_sb_info *sbi,
static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
struct curseg_info *seg)
{
- if (seg->alloc_type == SSR)
- __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
- else
+ if (seg->alloc_type == SSR) {
+ seg->next_blkoff =
+ __next_free_blkoff(sbi, seg->segno,
+ seg->next_blkoff + 1);
+ } else {
seg->next_blkoff++;
+ if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) {
+ /* To allocate block chunks in different sizes, use random number */
+ if (--seg->fragment_remained_chunk <= 0) {
+ seg->fragment_remained_chunk =
+ prandom_u32_max(sbi->max_fragment_chunk) + 1;
+ seg->next_blkoff +=
+ prandom_u32_max(sbi->max_fragment_hole) + 1;
+ }
+ }
+ }
+}
+
+bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
+{
+ return __next_free_blkoff(sbi, segno, 0) < sbi->blocks_per_seg;
}
/*
* This function always allocates a used segment(from dirty seglist) by SSR
* manner, so it should recover the existing segment information of valid blocks
*/
-static void change_curseg(struct f2fs_sb_info *sbi, int type)
+static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -2569,8 +2650,10 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
struct f2fs_summary_block *sum_node;
struct page *sum_page;
- write_sum_page(sbi, curseg->sum_blk,
- GET_SUM_BLOCK(sbi, curseg->segno));
+ if (flush)
+ write_sum_page(sbi, curseg->sum_blk,
+ GET_SUM_BLOCK(sbi, curseg->segno));
+
__set_test_and_inuse(sbi, new_segno);
mutex_lock(&dirty_i->seglist_lock);
@@ -2580,32 +2663,142 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
reset_curseg(sbi, type, 1);
curseg->alloc_type = SSR;
- __next_free_blkoff(sbi, curseg, 0);
+ curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0);
sum_page = f2fs_get_sum_page(sbi, new_segno);
- f2fs_bug_on(sbi, IS_ERR(sum_page));
+ if (IS_ERR(sum_page)) {
+ /* GC won't be able to use stale summary pages by cp_error */
+ memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
+ return;
+ }
sum_node = (struct f2fs_summary_block *)page_address(sum_page);
memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
f2fs_put_page(sum_page, 1);
}
-static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
+static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
+ int alloc_mode, unsigned long long age);
+
+static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
+ int target_type, int alloc_mode,
+ unsigned long long age)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+
+ curseg->seg_type = target_type;
+
+ if (get_ssr_segment(sbi, type, alloc_mode, age)) {
+ struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
+
+ curseg->seg_type = se->type;
+ change_curseg(sbi, type, true);
+ } else {
+ /* allocate cold segment by default */
+ curseg->seg_type = CURSEG_COLD_DATA;
+ new_curseg(sbi, type, true);
+ }
+ stat_inc_seg_type(sbi, curseg);
+}
+
+static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
+
+ if (!sbi->am.atgc_enabled)
+ return;
+
+ f2fs_down_read(&SM_I(sbi)->curseg_lock);
+
+ mutex_lock(&curseg->curseg_mutex);
+ down_write(&SIT_I(sbi)->sentry_lock);
+
+ get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0);
+
+ up_write(&SIT_I(sbi)->sentry_lock);
+ mutex_unlock(&curseg->curseg_mutex);
+
+ f2fs_up_read(&SM_I(sbi)->curseg_lock);
+
+}
+void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
+{
+ __f2fs_init_atgc_curseg(sbi);
+}
+
+static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+
+ mutex_lock(&curseg->curseg_mutex);
+ if (!curseg->inited)
+ goto out;
+
+ if (get_valid_blocks(sbi, curseg->segno, false)) {
+ write_sum_page(sbi, curseg->sum_blk,
+ GET_SUM_BLOCK(sbi, curseg->segno));
+ } else {
+ mutex_lock(&DIRTY_I(sbi)->seglist_lock);
+ __set_test_and_free(sbi, curseg->segno, true);
+ mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
+ }
+out:
+ mutex_unlock(&curseg->curseg_mutex);
+}
+
+void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi)
+{
+ __f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
+
+ if (sbi->am.atgc_enabled)
+ __f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
+}
+
+static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+
+ mutex_lock(&curseg->curseg_mutex);
+ if (!curseg->inited)
+ goto out;
+ if (get_valid_blocks(sbi, curseg->segno, false))
+ goto out;
+
+ mutex_lock(&DIRTY_I(sbi)->seglist_lock);
+ __set_test_and_inuse(sbi, curseg->segno);
+ mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
+out:
+ mutex_unlock(&curseg->curseg_mutex);
+}
+
+void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi)
+{
+ __f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
+
+ if (sbi->am.atgc_enabled)
+ __f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
+}
+
+static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
+ int alloc_mode, unsigned long long age)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
unsigned segno = NULL_SEGNO;
+ unsigned short seg_type = curseg->seg_type;
int i, cnt;
bool reversed = false;
+ sanity_check_seg_type(sbi, seg_type);
+
/* f2fs_need_SSR() already forces to do this */
- if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
+ if (!v_ops->get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) {
curseg->next_segno = segno;
return 1;
}
/* For node segments, let's do SSR more intensively */
- if (IS_NODESEG(type)) {
- if (type >= CURSEG_WARM_NODE) {
+ if (IS_NODESEG(seg_type)) {
+ if (seg_type >= CURSEG_WARM_NODE) {
reversed = true;
i = CURSEG_COLD_NODE;
} else {
@@ -2613,7 +2806,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
}
cnt = NR_CURSEG_NODE_TYPE;
} else {
- if (type >= CURSEG_WARM_DATA) {
+ if (seg_type >= CURSEG_WARM_DATA) {
reversed = true;
i = CURSEG_COLD_DATA;
} else {
@@ -2623,9 +2816,9 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
}
for (; cnt-- > 0; reversed ? i-- : i++) {
- if (i == type)
+ if (i == seg_type)
continue;
- if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
+ if (!v_ops->get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) {
curseg->next_segno = segno;
return 1;
}
@@ -2654,26 +2847,28 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
if (force)
new_curseg(sbi, type, true);
else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
- type == CURSEG_WARM_NODE)
+ curseg->seg_type == CURSEG_WARM_NODE)
new_curseg(sbi, type, false);
- else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
+ else if (curseg->alloc_type == LFS &&
+ is_next_segment_free(sbi, curseg, type) &&
likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
new_curseg(sbi, type, false);
- else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
- change_curseg(sbi, type);
+ else if (f2fs_need_SSR(sbi) &&
+ get_ssr_segment(sbi, type, SSR, 0))
+ change_curseg(sbi, type, true);
else
new_curseg(sbi, type, false);
stat_inc_seg_type(sbi, curseg);
}
-void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
unsigned int start, unsigned int end)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int segno;
- down_read(&SM_I(sbi)->curseg_lock);
+ f2fs_down_read(&SM_I(sbi)->curseg_lock);
mutex_lock(&curseg->curseg_mutex);
down_write(&SIT_I(sbi)->sentry_lock);
@@ -2681,8 +2876,8 @@ void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
if (segno < start || segno > end)
goto unlock;
- if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
- change_curseg(sbi, type);
+ if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
+ change_curseg(sbi, type, true);
else
new_curseg(sbi, type, true);
@@ -2697,32 +2892,55 @@ unlock:
type, segno, curseg->segno);
mutex_unlock(&curseg->curseg_mutex);
- up_read(&SM_I(sbi)->curseg_lock);
+ f2fs_up_read(&SM_I(sbi)->curseg_lock);
}
-void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi, int type)
+static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
+ bool new_sec, bool force)
{
- struct curseg_info *curseg;
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int old_segno;
- int i;
- down_write(&SIT_I(sbi)->sentry_lock);
+ if (!curseg->inited)
+ goto alloc;
- for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
- if (type != NO_CHECK_TYPE && i != type)
- continue;
+ if (force || curseg->next_blkoff ||
+ get_valid_blocks(sbi, curseg->segno, new_sec))
+ goto alloc;
- curseg = CURSEG_I(sbi, i);
- if (type == NO_CHECK_TYPE || curseg->next_blkoff ||
- get_valid_blocks(sbi, curseg->segno, false) ||
- get_ckpt_valid_blocks(sbi, curseg->segno)) {
- old_segno = curseg->segno;
- SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
- locate_dirty_segment(sbi, old_segno);
- }
- }
+ if (!get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
+ return;
+alloc:
+ old_segno = curseg->segno;
+ SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
+ locate_dirty_segment(sbi, old_segno);
+}
+
+static void __allocate_new_section(struct f2fs_sb_info *sbi,
+ int type, bool force)
+{
+ __allocate_new_segment(sbi, type, true, force);
+}
+
+void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
+{
+ f2fs_down_read(&SM_I(sbi)->curseg_lock);
+ down_write(&SIT_I(sbi)->sentry_lock);
+ __allocate_new_section(sbi, type, force);
+ up_write(&SIT_I(sbi)->sentry_lock);
+ f2fs_up_read(&SM_I(sbi)->curseg_lock);
+}
+void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
+{
+ int i;
+
+ f2fs_down_read(&SM_I(sbi)->curseg_lock);
+ down_write(&SIT_I(sbi)->sentry_lock);
+ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
+ __allocate_new_segment(sbi, i, false, false);
up_write(&SIT_I(sbi)->sentry_lock);
+ f2fs_up_read(&SM_I(sbi)->curseg_lock);
}
static const struct segment_allocation default_salloc_ops = {
@@ -2766,7 +2984,7 @@ next:
mutex_lock(&dcc->cmd_lock);
if (unlikely(dcc->rbtree_check))
f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
- &dcc->root));
+ &dcc->root, false));
dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
NULL, start,
@@ -2801,7 +3019,7 @@ next:
blk_finish_plug(&plug);
mutex_unlock(&dcc->cmd_lock);
trimmed += __wait_all_discard_cmd(sbi, NULL);
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
goto next;
}
skip:
@@ -2830,7 +3048,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
struct discard_policy dpolicy;
unsigned long long trimmed = 0;
int err = 0;
- bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
+ bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
return -EINVAL;
@@ -2860,9 +3078,9 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
if (sbi->discard_blks == 0)
goto out;
- down_write(&sbi->gc_lock);
+ f2fs_down_write(&sbi->gc_lock);
err = f2fs_write_checkpoint(sbi, &cpc);
- up_write(&sbi->gc_lock);
+ f2fs_up_write(&sbi->gc_lock);
if (err)
goto out;
@@ -2890,12 +3108,11 @@ out:
return err;
}
-static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
+static bool __has_curseg_space(struct f2fs_sb_info *sbi,
+ struct curseg_info *curseg)
{
- struct curseg_info *curseg = CURSEG_I(sbi, type);
- if (curseg->next_blkoff < sbi->blocks_per_seg)
- return true;
- return false;
+ return curseg->next_blkoff < f2fs_usable_blks_in_seg(sbi,
+ curseg->segno);
}
int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
@@ -2910,101 +3127,6 @@ int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
}
}
-/* This returns write hints for each segment type. This hints will be
- * passed down to block layer. There are mapping tables which depend on
- * the mount option 'whint_mode'.
- *
- * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
- *
- * 2) whint_mode=user-based. F2FS tries to pass down hints given by users.
- *
- * User F2FS Block
- * ---- ---- -----
- * META WRITE_LIFE_NOT_SET
- * HOT_NODE "
- * WARM_NODE "
- * COLD_NODE "
- * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
- * extension list " "
- *
- * -- buffered io
- * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
- * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
- * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
- * WRITE_LIFE_NONE " "
- * WRITE_LIFE_MEDIUM " "
- * WRITE_LIFE_LONG " "
- *
- * -- direct io
- * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
- * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
- * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
- * WRITE_LIFE_NONE " WRITE_LIFE_NONE
- * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
- * WRITE_LIFE_LONG " WRITE_LIFE_LONG
- *
- * 3) whint_mode=fs-based. F2FS passes down hints with its policy.
- *
- * User F2FS Block
- * ---- ---- -----
- * META WRITE_LIFE_MEDIUM;
- * HOT_NODE WRITE_LIFE_NOT_SET
- * WARM_NODE "
- * COLD_NODE WRITE_LIFE_NONE
- * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
- * extension list " "
- *
- * -- buffered io
- * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
- * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
- * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG
- * WRITE_LIFE_NONE " "
- * WRITE_LIFE_MEDIUM " "
- * WRITE_LIFE_LONG " "
- *
- * -- direct io
- * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
- * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
- * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
- * WRITE_LIFE_NONE " WRITE_LIFE_NONE
- * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
- * WRITE_LIFE_LONG " WRITE_LIFE_LONG
- */
-
-enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
- enum page_type type, enum temp_type temp)
-{
- if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
- if (type == DATA) {
- if (temp == WARM)
- return WRITE_LIFE_NOT_SET;
- else if (temp == HOT)
- return WRITE_LIFE_SHORT;
- else if (temp == COLD)
- return WRITE_LIFE_EXTREME;
- } else {
- return WRITE_LIFE_NOT_SET;
- }
- } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) {
- if (type == DATA) {
- if (temp == WARM)
- return WRITE_LIFE_LONG;
- else if (temp == HOT)
- return WRITE_LIFE_SHORT;
- else if (temp == COLD)
- return WRITE_LIFE_EXTREME;
- } else if (type == NODE) {
- if (temp == WARM || temp == HOT)
- return WRITE_LIFE_NOT_SET;
- else if (temp == COLD)
- return WRITE_LIFE_NONE;
- } else if (type == META) {
- return WRITE_LIFE_MEDIUM;
- }
- }
- return WRITE_LIFE_NOT_SET;
-}
-
static int __get_segment_type_2(struct f2fs_io_info *fio)
{
if (fio->type == DATA)
@@ -3035,13 +3157,22 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
if (fio->type == DATA) {
struct inode *inode = fio->page->mapping->host;
- if (is_cold_data(fio->page) || file_is_cold(inode) ||
- f2fs_compressed_file(inode))
+ if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
+ return CURSEG_COLD_DATA_PINNED;
+
+ if (page_private_gcing(fio->page)) {
+ if (fio->sbi->am.atgc_enabled &&
+ (fio->io_type == FS_DATA_IO) &&
+ (fio->sbi->gc_mode != GC_URGENT_HIGH))
+ return CURSEG_ALL_DATA_ATGC;
+ else
+ return CURSEG_COLD_DATA;
+ }
+ if (file_is_cold(inode) || f2fs_need_compress_data(inode))
return CURSEG_COLD_DATA;
if (file_is_hot(inode) ||
is_inode_flag_set(inode, FI_HOT_DATA) ||
- f2fs_is_atomic_file(inode) ||
- f2fs_is_volatile_file(inode))
+ f2fs_is_cow_file(inode))
return CURSEG_HOT_DATA;
return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
} else {
@@ -3082,31 +3213,29 @@ static int __get_segment_type(struct f2fs_io_info *fio)
void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type,
- struct f2fs_io_info *fio, bool add_list)
+ struct f2fs_io_info *fio)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, type);
- bool put_pin_sem = false;
-
- if (type == CURSEG_COLD_DATA) {
- /* GC during CURSEG_COLD_DATA_PINNED allocation */
- if (down_read_trylock(&sbi->pin_sem)) {
- put_pin_sem = true;
- } else {
- type = CURSEG_WARM_DATA;
- curseg = CURSEG_I(sbi, type);
- }
- } else if (type == CURSEG_COLD_DATA_PINNED) {
- type = CURSEG_COLD_DATA;
- }
+ unsigned long long old_mtime;
+ bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
+ struct seg_entry *se = NULL;
- down_read(&SM_I(sbi)->curseg_lock);
+ f2fs_down_read(&SM_I(sbi)->curseg_lock);
mutex_lock(&curseg->curseg_mutex);
down_write(&sit_i->sentry_lock);
+ if (from_gc) {
+ f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
+ se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
+ sanity_check_seg_type(sbi, se->type);
+ f2fs_bug_on(sbi, IS_NODESEG(se->type));
+ }
*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+ f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg);
+
f2fs_wait_discard_bio(sbi, *new_blkaddr);
/*
@@ -3120,6 +3249,14 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
stat_inc_block_count(sbi, curseg);
+ if (from_gc) {
+ old_mtime = get_segment_mtime(sbi, old_blkaddr);
+ } else {
+ update_segment_mtime(sbi, old_blkaddr, 0);
+ old_mtime = 0;
+ }
+ update_segment_mtime(sbi, *new_blkaddr, old_mtime);
+
/*
* SIT information should be updated before segment allocation,
* since SSR needs latest valid block information.
@@ -3128,9 +3265,13 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
update_sit_entry(sbi, old_blkaddr, -1);
- if (!__has_curseg_space(sbi, type))
- sit_i->s_ops->allocate_segment(sbi, type, false);
-
+ if (!__has_curseg_space(sbi, curseg)) {
+ if (from_gc)
+ get_atssr_segment(sbi, type, se->type,
+ AT_SSR, se->mtime);
+ else
+ sit_i->s_ops->allocate_segment(sbi, type, false);
+ }
/*
* segment dirty status should be updated after segment allocation,
* so we just need to update status only one time after previous
@@ -3147,12 +3288,12 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
f2fs_inode_chksum_set(sbi, page);
}
- if (F2FS_IO_ALIGNED(sbi))
- fio->retry = false;
-
- if (add_list) {
+ if (fio) {
struct f2fs_bio_info *io;
+ if (F2FS_IO_ALIGNED(sbi))
+ fio->retry = false;
+
INIT_LIST_HEAD(&fio->list);
fio->in_list = true;
io = sbi->write_io[fio->type] + fio->temp;
@@ -3163,46 +3304,51 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
mutex_unlock(&curseg->curseg_mutex);
- up_read(&SM_I(sbi)->curseg_lock);
-
- if (put_pin_sem)
- up_read(&sbi->pin_sem);
+ f2fs_up_read(&SM_I(sbi)->curseg_lock);
}
-static void update_device_state(struct f2fs_io_info *fio)
+void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
+ block_t blkaddr, unsigned int blkcnt)
{
- struct f2fs_sb_info *sbi = fio->sbi;
- unsigned int devidx;
-
if (!f2fs_is_multi_device(sbi))
return;
- devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
+ while (1) {
+ unsigned int devidx = f2fs_target_device_index(sbi, blkaddr);
+ unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1;
- /* update device state for fsync */
- f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
+ /* update device state for fsync */
+ f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO);
- /* update device state for checkpoint */
- if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
- spin_lock(&sbi->dev_lock);
- f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
- spin_unlock(&sbi->dev_lock);
+ /* update device state for checkpoint */
+ if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
+ spin_lock(&sbi->dev_lock);
+ f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
+ spin_unlock(&sbi->dev_lock);
+ }
+
+ if (blkcnt <= blks)
+ break;
+ blkcnt -= blks;
+ blkaddr += blks;
}
}
static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
{
int type = __get_segment_type(fio);
- bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA);
+ bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA);
if (keep_order)
- down_read(&fio->sbi->io_order_lock);
+ f2fs_down_read(&fio->sbi->io_order_lock);
reallocate:
f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
- &fio->new_blkaddr, sum, type, fio, true);
- if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
+ &fio->new_blkaddr, sum, type, fio);
+ if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) {
invalidate_mapping_pages(META_MAPPING(fio->sbi),
fio->old_blkaddr, fio->old_blkaddr);
+ f2fs_invalidate_compress_page(fio->sbi, fio->old_blkaddr);
+ }
/* writeout dirty page into bdev */
f2fs_submit_page_write(fio);
@@ -3211,10 +3357,10 @@ reallocate:
goto reallocate;
}
- update_device_state(fio);
+ f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
if (keep_order)
- up_read(&fio->sbi->io_order_lock);
+ f2fs_up_read(&fio->sbi->io_order_lock);
}
void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
@@ -3241,7 +3387,7 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
f2fs_submit_page_write(&fio);
stat_inc_meta_count(sbi, page->index);
- f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
+ f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE);
}
void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
@@ -3251,7 +3397,7 @@ void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
set_summary(&sum, nid, 0, 0);
do_write_page(&sum, fio);
- f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
+ f2fs_update_iostat(fio->sbi, NULL, fio->io_type, F2FS_BLKSIZE);
}
void f2fs_outplace_write_data(struct dnode_of_data *dn,
@@ -3265,7 +3411,7 @@ void f2fs_outplace_write_data(struct dnode_of_data *dn,
do_write_page(&sum, fio);
f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
- f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE);
+ f2fs_update_iostat(sbi, dn->inode, fio->io_type, F2FS_BLKSIZE);
}
int f2fs_inplace_write_data(struct f2fs_io_info *fio)
@@ -3284,9 +3430,20 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
__func__, segno);
- return -EFSCORRUPTED;
+ err = -EFSCORRUPTED;
+ f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
+ goto drop_bio;
+ }
+
+ if (f2fs_cp_error(sbi)) {
+ err = -EIO;
+ goto drop_bio;
}
+ if (fio->post_read)
+ invalidate_mapping_pages(META_MAPPING(sbi),
+ fio->new_blkaddr, fio->new_blkaddr);
+
stat_inc_inplace_blocks(fio->sbi);
if (fio->bio && !(SM_I(sbi)->ipu_policy & (1 << F2FS_IPU_NOCACHE)))
@@ -3294,11 +3451,22 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
else
err = f2fs_submit_page_bio(fio);
if (!err) {
- update_device_state(fio);
- f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
+ f2fs_update_device_state(fio->sbi, fio->ino,
+ fio->new_blkaddr, 1);
+ f2fs_update_iostat(fio->sbi, fio->page->mapping->host,
+ fio->io_type, F2FS_BLKSIZE);
}
return err;
+drop_bio:
+ if (fio->bio && *(fio->bio)) {
+ struct bio *bio = *(fio->bio);
+
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ *(fio->bio) = NULL;
+ }
+ return err;
}
static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
@@ -3315,7 +3483,8 @@ static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t old_blkaddr, block_t new_blkaddr,
- bool recover_curseg, bool recover_newaddr)
+ bool recover_curseg, bool recover_newaddr,
+ bool from_gc)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg;
@@ -3323,12 +3492,13 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
struct seg_entry *se;
int type;
unsigned short old_blkoff;
+ unsigned char old_alloc_type;
segno = GET_SEGNO(sbi, new_blkaddr);
se = get_seg_entry(sbi, segno);
type = se->type;
- down_write(&SM_I(sbi)->curseg_lock);
+ f2fs_down_write(&SM_I(sbi)->curseg_lock);
if (!recover_curseg) {
/* for recovery flow */
@@ -3356,21 +3526,28 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
old_cursegno = curseg->segno;
old_blkoff = curseg->next_blkoff;
+ old_alloc_type = curseg->alloc_type;
/* change the current segment */
if (segno != curseg->segno) {
curseg->next_segno = segno;
- change_curseg(sbi, type);
+ change_curseg(sbi, type, true);
}
curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
__add_sum_entry(sbi, type, sum);
- if (!recover_curseg || recover_newaddr)
+ if (!recover_curseg || recover_newaddr) {
+ if (!from_gc)
+ update_segment_mtime(sbi, new_blkaddr, 0);
update_sit_entry(sbi, new_blkaddr, 1);
+ }
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
invalidate_mapping_pages(META_MAPPING(sbi),
old_blkaddr, old_blkaddr);
+ f2fs_invalidate_compress_page(sbi, old_blkaddr);
+ if (!from_gc)
+ update_segment_mtime(sbi, old_blkaddr, 0);
update_sit_entry(sbi, old_blkaddr, -1);
}
@@ -3382,14 +3559,15 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (recover_curseg) {
if (old_cursegno != curseg->segno) {
curseg->next_segno = old_cursegno;
- change_curseg(sbi, type);
+ change_curseg(sbi, type, true);
}
curseg->next_blkoff = old_blkoff;
+ curseg->alloc_type = old_alloc_type;
}
up_write(&sit_i->sentry_lock);
mutex_unlock(&curseg->curseg_mutex);
- up_write(&SM_I(sbi)->curseg_lock);
+ f2fs_up_write(&SM_I(sbi)->curseg_lock);
}
void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
@@ -3402,7 +3580,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
set_summary(&sum, dn->nid, dn->ofs_in_node, version);
f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
- recover_curseg, recover_newaddr);
+ recover_curseg, recover_newaddr, false);
f2fs_update_data_blkaddr(dn, new_addr);
}
@@ -3447,10 +3625,16 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
block_t len)
{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
block_t i;
+ if (!f2fs_post_read_required(inode))
+ return;
+
for (i = 0; i < len; i++)
f2fs_wait_on_block_writeback(inode, blkaddr + i);
+
+ invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1);
}
static int read_compacted_summaries(struct f2fs_sb_info *sbi)
@@ -3496,6 +3680,7 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi)
for (j = 0; j < blk_off; j++) {
struct f2fs_summary *s;
+
s = (struct f2fs_summary *)(kaddr + offset);
seg_i->sum_blk->entries[j] = *s;
offset += SUMMARY_SIZE;
@@ -3534,7 +3719,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
CURSEG_HOT_DATA]);
if (__exist_node_summaries(sbi))
- blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
+ blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
else
blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
} else {
@@ -3558,6 +3743,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
if (__exist_node_summaries(sbi)) {
struct f2fs_summary *ns = &sum->entries[0];
int i;
+
for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
ns->version = 0;
ns->ofs_in_node = 0;
@@ -3612,8 +3798,9 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
}
if (__exist_node_summaries(sbi))
- f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
- NR_CURSEG_TYPE - type, META_CP, true);
+ f2fs_ra_meta_pages(sbi,
+ sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
+ NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
for (; type <= CURSEG_COLD_NODE; type++) {
err = read_normal_summaries(sbi, type);
@@ -3624,7 +3811,7 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
/* sanity check for summary blocks */
if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
- f2fs_err(sbi, "invalid journal entries nats %u sits %u\n",
+ f2fs_err(sbi, "invalid journal entries nats %u sits %u",
nats_in_cursum(nat_j), sits_in_cursum(sit_j));
return -EINVAL;
}
@@ -3658,6 +3845,7 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
/* Step 3: write summary entries */
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
unsigned short blkoff;
+
seg_i = CURSEG_I(sbi, i);
if (sbi->ckpt->alloc_type[i] == SSR)
blkoff = sbi->blocks_per_seg;
@@ -3694,6 +3882,7 @@ static void write_normal_summaries(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
int i, end;
+
if (IS_DATASEG(type))
end = type + NR_CURSEG_DATA_TYPE;
else
@@ -3741,7 +3930,7 @@ int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
unsigned int segno)
{
- return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
+ return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
}
static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
@@ -3766,7 +3955,8 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
static struct sit_entry_set *grab_sit_entry_set(void)
{
struct sit_entry_set *ses =
- f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
+ f2fs_kmem_cache_alloc(sit_entry_set_slab,
+ GFP_NOFS, true, NULL);
ses->entry_cnt = 0;
INIT_LIST_HEAD(&ses->set_list);
@@ -3788,10 +3978,12 @@ static void adjust_sit_entry_set(struct sit_entry_set *ses,
return;
list_for_each_entry_continue(next, head, set_list)
- if (ses->entry_cnt <= next->entry_cnt)
- break;
+ if (ses->entry_cnt <= next->entry_cnt) {
+ list_move_tail(&ses->set_list, &next->set_list);
+ return;
+ }
- list_move_tail(&ses->set_list, &next->set_list);
+ list_move_tail(&ses->set_list, head);
}
static void add_sit_entry(unsigned int segno, struct list_head *head)
@@ -3977,6 +4169,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
unsigned int sit_segs, start;
char *src_bitmap, *bitmap;
unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
+ unsigned int discard_map = f2fs_block_unit_discard(sbi) ? 1 : 0;
/* allocate memory for SIT information */
sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
@@ -3999,9 +4192,9 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
return -ENOMEM;
#ifdef CONFIG_F2FS_CHECK_FS
- bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 4;
+ bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (3 + discard_map);
#else
- bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 3;
+ bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (2 + discard_map);
#endif
sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
if (!sit_i->bitmap)
@@ -4021,8 +4214,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
bitmap += SIT_VBLOCK_MAP_SIZE;
#endif
- sit_i->sentries[start].discard_map = bitmap;
- bitmap += SIT_VBLOCK_MAP_SIZE;
+ if (discard_map) {
+ sit_i->sentries[start].discard_map = bitmap;
+ bitmap += SIT_VBLOCK_MAP_SIZE;
+ }
}
sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
@@ -4071,7 +4266,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
sit_i->dirty_sentries = 0;
sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
- sit_i->mounted_time = ktime_get_real_seconds();
+ sit_i->mounted_time = ktime_get_boottime_seconds();
init_rwsem(&sit_i->sentry_lock);
return 0;
}
@@ -4115,14 +4310,14 @@ static int build_curseg(struct f2fs_sb_info *sbi)
struct curseg_info *array;
int i;
- array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
- GFP_KERNEL);
+ array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
+ sizeof(*array)), GFP_KERNEL);
if (!array)
return -ENOMEM;
SM_I(sbi)->curseg_array = array;
- for (i = 0; i < NR_CURSEG_TYPE; i++) {
+ for (i = 0; i < NO_CHECK_TYPE; i++) {
mutex_init(&array[i].curseg_mutex);
array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
if (!array[i].sum_blk)
@@ -4132,8 +4327,15 @@ static int build_curseg(struct f2fs_sb_info *sbi)
sizeof(struct f2fs_journal), GFP_KERNEL);
if (!array[i].journal)
return -ENOMEM;
+ if (i < NR_PERSISTENT_LOG)
+ array[i].seg_type = CURSEG_HOT_DATA + i;
+ else if (i == CURSEG_COLD_DATA_PINNED)
+ array[i].seg_type = CURSEG_COLD_DATA;
+ else if (i == CURSEG_ALL_DATA_ATGC)
+ array[i].seg_type = CURSEG_COLD_DATA;
array[i].segno = NULL_SEGNO;
array[i].next_blkoff = 0;
+ array[i].inited = false;
}
return restore_curseg_summaries(sbi);
}
@@ -4149,10 +4351,10 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
unsigned int i, start, end;
unsigned int readed, start_blk = 0;
int err = 0;
- block_t total_node_blocks = 0;
+ block_t sit_valid_blocks[2] = {0, 0};
do {
- readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
+ readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS,
META_SIT, true);
start = start_blk * sit_i->sents_per_block;
@@ -4174,20 +4376,30 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
if (err)
return err;
seg_info_from_raw_sit(se, &sit);
- if (IS_NODESEG(se->type))
- total_node_blocks += se->valid_blocks;
- /* build discard map only one time */
- if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
- memset(se->discard_map, 0xff,
- SIT_VBLOCK_MAP_SIZE);
- } else {
- memcpy(se->discard_map,
- se->cur_valid_map,
- SIT_VBLOCK_MAP_SIZE);
- sbi->discard_blks +=
- sbi->blocks_per_seg -
- se->valid_blocks;
+ if (se->type >= NR_PERSISTENT_LOG) {
+ f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
+ se->type, start);
+ f2fs_handle_error(sbi,
+ ERROR_INCONSISTENT_SUM_TYPE);
+ return -EFSCORRUPTED;
+ }
+
+ sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
+
+ if (f2fs_block_unit_discard(sbi)) {
+ /* build discard map only one time */
+ if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+ memset(se->discard_map, 0xff,
+ SIT_VBLOCK_MAP_SIZE);
+ } else {
+ memcpy(se->discard_map,
+ se->cur_valid_map,
+ SIT_VBLOCK_MAP_SIZE);
+ sbi->discard_blks +=
+ sbi->blocks_per_seg -
+ se->valid_blocks;
+ }
}
if (__is_large_section(sbi))
@@ -4206,6 +4418,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
f2fs_err(sbi, "Wrong journal entry on segno %u",
start);
err = -EFSCORRUPTED;
+ f2fs_handle_error(sbi, ERROR_CORRUPTED_JOURNAL);
break;
}
@@ -4213,23 +4426,33 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
sit = sit_in_journal(journal, i);
old_valid_blocks = se->valid_blocks;
- if (IS_NODESEG(se->type))
- total_node_blocks -= old_valid_blocks;
+
+ sit_valid_blocks[SE_PAGETYPE(se)] -= old_valid_blocks;
err = check_block_count(sbi, start, &sit);
if (err)
break;
seg_info_from_raw_sit(se, &sit);
- if (IS_NODESEG(se->type))
- total_node_blocks += se->valid_blocks;
- if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
- memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
- } else {
- memcpy(se->discard_map, se->cur_valid_map,
- SIT_VBLOCK_MAP_SIZE);
- sbi->discard_blks += old_valid_blocks;
- sbi->discard_blks -= se->valid_blocks;
+ if (se->type >= NR_PERSISTENT_LOG) {
+ f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
+ se->type, start);
+ err = -EFSCORRUPTED;
+ f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
+ break;
+ }
+
+ sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
+
+ if (f2fs_block_unit_discard(sbi)) {
+ if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+ memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
+ } else {
+ memcpy(se->discard_map, se->cur_valid_map,
+ SIT_VBLOCK_MAP_SIZE);
+ sbi->discard_blks += old_valid_blocks;
+ sbi->discard_blks -= se->valid_blocks;
+ }
}
if (__is_large_section(sbi)) {
@@ -4241,22 +4464,38 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
}
up_read(&curseg->journal_rwsem);
- if (!err && total_node_blocks != valid_node_count(sbi)) {
+ if (err)
+ return err;
+
+ if (sit_valid_blocks[NODE] != valid_node_count(sbi)) {
f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
- total_node_blocks, valid_node_count(sbi));
- err = -EFSCORRUPTED;
+ sit_valid_blocks[NODE], valid_node_count(sbi));
+ f2fs_handle_error(sbi, ERROR_INCONSISTENT_NODE_COUNT);
+ return -EFSCORRUPTED;
}
- return err;
+ if (sit_valid_blocks[DATA] + sit_valid_blocks[NODE] >
+ valid_user_blocks(sbi)) {
+ f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u",
+ sit_valid_blocks[DATA], sit_valid_blocks[NODE],
+ valid_user_blocks(sbi));
+ f2fs_handle_error(sbi, ERROR_INCONSISTENT_BLOCK_COUNT);
+ return -EFSCORRUPTED;
+ }
+
+ return 0;
}
static void init_free_segmap(struct f2fs_sb_info *sbi)
{
unsigned int start;
int type;
+ struct seg_entry *sentry;
for (start = 0; start < MAIN_SEGS(sbi); start++) {
- struct seg_entry *sentry = get_seg_entry(sbi, start);
+ if (f2fs_usable_blks_in_seg(sbi, start) == 0)
+ continue;
+ sentry = get_seg_entry(sbi, start);
if (!sentry->valid_blocks)
__set_free(sbi, start);
else
@@ -4267,6 +4506,7 @@ static void init_free_segmap(struct f2fs_sb_info *sbi)
/* set use the current segments */
for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
struct curseg_info *curseg_t = CURSEG_I(sbi, type);
+
__set_test_and_inuse(sbi, curseg_t->segno);
}
}
@@ -4275,8 +4515,8 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct free_segmap_info *free_i = FREE_I(sbi);
- unsigned int segno = 0, offset = 0;
- unsigned short valid_blocks;
+ unsigned int segno = 0, offset = 0, secno;
+ block_t valid_blocks, usable_blks_in_seg;
while (1) {
/* find dirty segment based on free segmap */
@@ -4285,9 +4525,10 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
break;
offset = segno + 1;
valid_blocks = get_valid_blocks(sbi, segno, false);
- if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
+ usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
+ if (valid_blocks == usable_blks_in_seg || !valid_blocks)
continue;
- if (valid_blocks > sbi->blocks_per_seg) {
+ if (valid_blocks > usable_blks_in_seg) {
f2fs_bug_on(sbi, 1);
continue;
}
@@ -4295,6 +4536,22 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
__locate_dirty_segment(sbi, segno, DIRTY);
mutex_unlock(&dirty_i->seglist_lock);
}
+
+ if (!__is_large_section(sbi))
+ return;
+
+ mutex_lock(&dirty_i->seglist_lock);
+ for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
+ valid_blocks = get_valid_blocks(sbi, segno, true);
+ secno = GET_SEC_FROM_SEG(sbi, segno);
+
+ if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi))
+ continue;
+ if (IS_CURSEC(sbi, secno))
+ continue;
+ set_bit(secno, dirty_i->dirty_secmap);
+ }
+ mutex_unlock(&dirty_i->seglist_lock);
}
static int init_victim_secmap(struct f2fs_sb_info *sbi)
@@ -4305,6 +4562,13 @@ static int init_victim_secmap(struct f2fs_sb_info *sbi)
dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
if (!dirty_i->victim_secmap)
return -ENOMEM;
+
+ dirty_i->pinned_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
+ if (!dirty_i->pinned_secmap)
+ return -ENOMEM;
+
+ dirty_i->pinned_secmap_cnt = 0;
+ dirty_i->enable_pin_section = true;
return 0;
}
@@ -4331,6 +4595,14 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi)
return -ENOMEM;
}
+ if (__is_large_section(sbi)) {
+ bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
+ dirty_i->dirty_secmap = f2fs_kvzalloc(sbi,
+ bitmap_size, GFP_KERNEL);
+ if (!dirty_i->dirty_secmap)
+ return -ENOMEM;
+ }
+
init_dirty_segmap(sbi);
return init_victim_secmap(sbi);
}
@@ -4343,11 +4615,25 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
* In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
* In LFS curseg, all blkaddr after .next_blkoff should be unused.
*/
- for (i = 0; i < NO_CHECK_TYPE; i++) {
+ for (i = 0; i < NR_PERSISTENT_LOG; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
unsigned int blkofs = curseg->next_blkoff;
+ if (f2fs_sb_has_readonly(sbi) &&
+ i != CURSEG_HOT_DATA && i != CURSEG_HOT_NODE)
+ continue;
+
+ sanity_check_seg_type(sbi, curseg->seg_type);
+
+ if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) {
+ f2fs_err(sbi,
+ "Current segment has invalid alloc_type:%d",
+ curseg->alloc_type);
+ f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
+ return -EFSCORRUPTED;
+ }
+
if (f2fs_test_bit(blkofs, se->cur_valid_map))
goto out;
@@ -4362,6 +4648,7 @@ out:
"Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
i, curseg->segno, curseg->alloc_type,
curseg->next_blkoff, blkofs);
+ f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
return -EFSCORRUPTED;
}
}
@@ -4471,7 +4758,8 @@ static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
}
static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
- void *data) {
+ void *data)
+{
memcpy(data, zone, sizeof(struct blk_zone));
return 0;
}
@@ -4523,7 +4811,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
f2fs_notice(sbi, "Assign new section to curseg[%d]: "
"curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff);
- allocate_segment_by_default(sbi, type, true);
+
+ f2fs_allocate_new_section(sbi, type, true);
/* check consistency of the zone curseg pointed to */
if (check_zone_write_pointer(sbi, zbd, &zone))
@@ -4572,7 +4861,7 @@ int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
{
int i, ret;
- for (i = 0; i < NO_CHECK_TYPE; i++) {
+ for (i = 0; i < NR_PERSISTENT_LOG; i++) {
ret = fix_curseg_write_pointer(sbi, i);
if (ret)
return ret;
@@ -4587,8 +4876,10 @@ struct check_zone_write_pointer_args {
};
static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx,
- void *data) {
+ void *data)
+{
struct check_zone_write_pointer_args *args;
+
args = (struct check_zone_write_pointer_args *)data;
return check_zone_write_pointer(args->sbi, args->fdev, zone);
@@ -4613,6 +4904,94 @@ int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
return 0;
}
+
+static bool is_conv_zone(struct f2fs_sb_info *sbi, unsigned int zone_idx,
+ unsigned int dev_idx)
+{
+ if (!bdev_is_zoned(FDEV(dev_idx).bdev))
+ return true;
+ return !test_bit(zone_idx, FDEV(dev_idx).blkz_seq);
+}
+
+/* Return the zone index in the given device */
+static unsigned int get_zone_idx(struct f2fs_sb_info *sbi, unsigned int secno,
+ int dev_idx)
+{
+ block_t sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
+
+ return (sec_start_blkaddr - FDEV(dev_idx).start_blk) >>
+ sbi->log_blocks_per_blkz;
+}
+
+/*
+ * Return the usable segments in a section based on the zone's
+ * corresponding zone capacity. Zone is equal to a section.
+ */
+static inline unsigned int f2fs_usable_zone_segs_in_sec(
+ struct f2fs_sb_info *sbi, unsigned int segno)
+{
+ unsigned int dev_idx, zone_idx;
+
+ dev_idx = f2fs_target_device_index(sbi, START_BLOCK(sbi, segno));
+ zone_idx = get_zone_idx(sbi, GET_SEC_FROM_SEG(sbi, segno), dev_idx);
+
+ /* Conventional zone's capacity is always equal to zone size */
+ if (is_conv_zone(sbi, zone_idx, dev_idx))
+ return sbi->segs_per_sec;
+
+ if (!sbi->unusable_blocks_per_sec)
+ return sbi->segs_per_sec;
+
+ /* Get the segment count beyond zone capacity block */
+ return sbi->segs_per_sec - (sbi->unusable_blocks_per_sec >>
+ sbi->log_blocks_per_seg);
+}
+
+/*
+ * Return the number of usable blocks in a segment. The number of blocks
+ * returned is always equal to the number of blocks in a segment for
+ * segments fully contained within a sequential zone capacity or a
+ * conventional zone. For segments partially contained in a sequential
+ * zone capacity, the number of usable blocks up to the zone capacity
+ * is returned. 0 is returned in all other cases.
+ */
+static inline unsigned int f2fs_usable_zone_blks_in_seg(
+ struct f2fs_sb_info *sbi, unsigned int segno)
+{
+ block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
+ unsigned int zone_idx, dev_idx, secno;
+
+ secno = GET_SEC_FROM_SEG(sbi, segno);
+ seg_start = START_BLOCK(sbi, segno);
+ dev_idx = f2fs_target_device_index(sbi, seg_start);
+ zone_idx = get_zone_idx(sbi, secno, dev_idx);
+
+ /*
+ * Conventional zone's capacity is always equal to zone size,
+ * so, blocks per segment is unchanged.
+ */
+ if (is_conv_zone(sbi, zone_idx, dev_idx))
+ return sbi->blocks_per_seg;
+
+ if (!sbi->unusable_blocks_per_sec)
+ return sbi->blocks_per_seg;
+
+ sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
+ sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi);
+
+ /*
+ * If segment starts before zone capacity and spans beyond
+ * zone capacity, then usable blocks are from seg start to
+ * zone capacity. If the segment starts after the zone capacity,
+ * then there are no usable blocks.
+ */
+ if (seg_start >= sec_cap_blkaddr)
+ return 0;
+ if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr)
+ return sec_cap_blkaddr - seg_start;
+
+ return sbi->blocks_per_seg;
+}
#else
int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
{
@@ -4623,7 +5002,36 @@ int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
{
return 0;
}
+
+static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ return 0;
+}
+
+static inline unsigned int f2fs_usable_zone_segs_in_sec(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ return 0;
+}
#endif
+unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ if (f2fs_sb_has_blkzoned(sbi))
+ return f2fs_usable_zone_blks_in_seg(sbi, segno);
+
+ return sbi->blocks_per_seg;
+}
+
+unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ if (f2fs_sb_has_blkzoned(sbi))
+ return f2fs_usable_zone_segs_in_sec(sbi, segno);
+
+ return sbi->segs_per_sec;
+}
/*
* Update min, max modified time for cost-benefit GC algorithm
@@ -4650,6 +5058,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
sit_i->min_mtime = mtime;
}
sit_i->max_mtime = get_mtime(sbi, false);
+ sit_i->dirty_max_mtime = 0;
up_write(&sit_i->sentry_lock);
}
@@ -4678,17 +5087,17 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
- if (!test_opt(sbi, LFS))
+ if (!f2fs_lfs_mode(sbi))
sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
- sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
+ sm_info->min_seq_blocks = sbi->blocks_per_seg;
sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
sm_info->min_ssr_sections = reserved_sections(sbi);
INIT_LIST_HEAD(&sm_info->sit_entry_set);
- init_rwsem(&sm_info->curseg_lock);
+ init_f2fs_rwsem(&sm_info->curseg_lock);
if (!f2fs_readonly(sbi->sb)) {
err = f2fs_create_flush_cmd_control(sbi);
@@ -4742,6 +5151,8 @@ static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+
+ kvfree(dirty_i->pinned_secmap);
kvfree(dirty_i->victim_secmap);
}
@@ -4757,9 +5168,15 @@ static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
for (i = 0; i < NR_DIRTY_TYPE; i++)
discard_dirty_segmap(sbi, i);
+ if (__is_large_section(sbi)) {
+ mutex_lock(&dirty_i->seglist_lock);
+ kvfree(dirty_i->dirty_secmap);
+ mutex_unlock(&dirty_i->seglist_lock);
+ }
+
destroy_victim_secmap(sbi);
SM_I(sbi)->dirty_info = NULL;
- kvfree(dirty_i);
+ kfree(dirty_i);
}
static void destroy_curseg(struct f2fs_sb_info *sbi)
@@ -4771,21 +5188,22 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
return;
SM_I(sbi)->curseg_array = NULL;
for (i = 0; i < NR_CURSEG_TYPE; i++) {
- kvfree(array[i].sum_blk);
- kvfree(array[i].journal);
+ kfree(array[i].sum_blk);
+ kfree(array[i].journal);
}
- kvfree(array);
+ kfree(array);
}
static void destroy_free_segmap(struct f2fs_sb_info *sbi)
{
struct free_segmap_info *free_i = SM_I(sbi)->free_info;
+
if (!free_i)
return;
SM_I(sbi)->free_info = NULL;
kvfree(free_i->free_segmap);
kvfree(free_i->free_secmap);
- kvfree(free_i);
+ kfree(free_i);
}
static void destroy_sit_info(struct f2fs_sb_info *sbi)
@@ -4797,7 +5215,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
if (sit_i->sentries)
kvfree(sit_i->bitmap);
- kvfree(sit_i->tmp_map);
+ kfree(sit_i->tmp_map);
kvfree(sit_i->sentries);
kvfree(sit_i->sec_entries);
@@ -4809,7 +5227,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
kvfree(sit_i->sit_bitmap_mir);
kvfree(sit_i->invalid_segmap);
#endif
- kvfree(sit_i);
+ kfree(sit_i);
}
void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
@@ -4825,29 +5243,29 @@ void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
destroy_free_segmap(sbi);
destroy_sit_info(sbi);
sbi->sm_info = NULL;
- kvfree(sm_info);
+ kfree(sm_info);
}
int __init f2fs_create_segment_manager_caches(void)
{
- discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
+ discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry",
sizeof(struct discard_entry));
if (!discard_entry_slab)
goto fail;
- discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd",
+ discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd",
sizeof(struct discard_cmd));
if (!discard_cmd_slab)
goto destroy_discard_entry;
- sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
+ sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set",
sizeof(struct sit_entry_set));
if (!sit_entry_set_slab)
goto destroy_discard_cmd;
- inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
- sizeof(struct inmem_pages));
- if (!inmem_entry_slab)
+ revoke_entry_slab = f2fs_kmem_cache_create("f2fs_revoke_entry",
+ sizeof(struct revoke_entry));
+ if (!revoke_entry_slab)
goto destroy_sit_entry_set;
return 0;
@@ -4866,5 +5284,5 @@ void f2fs_destroy_segment_manager_caches(void)
kmem_cache_destroy(sit_entry_set_slab);
kmem_cache_destroy(discard_cmd_slab);
kmem_cache_destroy(discard_entry_slab);
- kmem_cache_destroy(inmem_entry_slab);
+ kmem_cache_destroy(revoke_entry_slab);
}