From 10ee27a06cc8eb57f83342a8eabcb75deb872d52 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 10 Jan 2013 13:47:57 +0800 Subject: vfs: re-implement writeback_inodes_sb(_nr)_if_idle() and rename them writeback_inodes_sb(_nr)_if_idle() is re-implemented by replacing down_read() with down_read_trylock() because - If ->s_umount is write locked, then the sb is not idle. That is writeback_inodes_sb(_nr)_if_idle() needn't wait for the lock. - writeback_inodes_sb(_nr)_if_idle() grabs s_umount lock when it want to start writeback, it may bring us deadlock problem when doing umount. In order to fix the problem, ext4 and btrfs implemented their own writeback functions instead of writeback_inodes_sb(_nr)_if_idle(), but it introduced the redundant code, it is better to implement a new writeback_inodes_sb(_nr)_if_idle(). The name of these two functions is cumbersome, so rename them to try_to_writeback_inodes_sb(_nr). This idea came from Christoph Hellwig. Some code is from the patch of Kamal Mostafa. Reviewed-by: Jan Kara Signed-off-by: Miao Xie Signed-off-by: Fengguang Wu --- fs/btrfs/extent-tree.c | 20 +++----------------- fs/ext4/inode.c | 8 ++------ fs/fs-writeback.c | 44 ++++++++++++++++++++------------------------ include/linux/writeback.h | 6 +++--- 4 files changed, 28 insertions(+), 50 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 521e9d4424f6..f31abb14e06f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3689,20 +3689,6 @@ static int can_overcommit(struct btrfs_root *root, return 0; } -static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb, - unsigned long nr_pages, - enum wb_reason reason) -{ - if (!writeback_in_progress(sb->s_bdi) && - down_read_trylock(&sb->s_umount)) { - writeback_inodes_sb_nr(sb, nr_pages, reason); - up_read(&sb->s_umount); - return 1; - } - - return 0; -} - /* * shrink metadata reservation for delalloc */ @@ -3735,9 +3721,9 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, while (delalloc_bytes && loops < 3) { max_reclaim = min(delalloc_bytes, to_reclaim); nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; - writeback_inodes_sb_nr_if_idle_safe(root->fs_info->sb, - nr_pages, - WB_REASON_FS_FREE_SPACE); + try_to_writeback_inodes_sb_nr(root->fs_info->sb, + nr_pages, + WB_REASON_FS_FREE_SPACE); /* * We need to wait for the async pages to actually start before diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cbfe13bf5b2a..5f6eef71ff21 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2512,12 +2512,8 @@ static int ext4_nonda_switch(struct super_block *sb) /* * Start pushing delalloc when 1/2 of free blocks are dirty. */ - if (dirty_blocks && (free_blocks < 2 * dirty_blocks) && - !writeback_in_progress(sb->s_bdi) && - down_read_trylock(&sb->s_umount)) { - writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); - up_read(&sb->s_umount); - } + if (dirty_blocks && (free_blocks < 2 * dirty_blocks)) + try_to_writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); if (2 * free_blocks < 3 * dirty_blocks || free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 310972b72a66..ad3cc46a743a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1332,47 +1332,43 @@ void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason) EXPORT_SYMBOL(writeback_inodes_sb); /** - * writeback_inodes_sb_if_idle - start writeback if none underway + * try_to_writeback_inodes_sb_nr - try to start writeback if none underway * @sb: the superblock - * @reason: reason why some writeback work was initiated + * @nr: the number of pages to write + * @reason: the reason of writeback * - * Invoke writeback_inodes_sb if no writeback is currently underway. + * Invoke writeback_inodes_sb_nr if no writeback is currently underway. * Returns 1 if writeback was started, 0 if not. */ -int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason) +int try_to_writeback_inodes_sb_nr(struct super_block *sb, + unsigned long nr, + enum wb_reason reason) { - if (!writeback_in_progress(sb->s_bdi)) { - down_read(&sb->s_umount); - writeback_inodes_sb(sb, reason); - up_read(&sb->s_umount); + if (writeback_in_progress(sb->s_bdi)) return 1; - } else + + if (!down_read_trylock(&sb->s_umount)) return 0; + + writeback_inodes_sb_nr(sb, nr, reason); + up_read(&sb->s_umount); + return 1; } -EXPORT_SYMBOL(writeback_inodes_sb_if_idle); +EXPORT_SYMBOL(try_to_writeback_inodes_sb_nr); /** - * writeback_inodes_sb_nr_if_idle - start writeback if none underway + * try_to_writeback_inodes_sb - try to start writeback if none underway * @sb: the superblock - * @nr: the number of pages to write * @reason: reason why some writeback work was initiated * - * Invoke writeback_inodes_sb if no writeback is currently underway. + * Implement by try_to_writeback_inodes_sb_nr() * Returns 1 if writeback was started, 0 if not. */ -int writeback_inodes_sb_nr_if_idle(struct super_block *sb, - unsigned long nr, - enum wb_reason reason) +int try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason) { - if (!writeback_in_progress(sb->s_bdi)) { - down_read(&sb->s_umount); - writeback_inodes_sb_nr(sb, nr, reason); - up_read(&sb->s_umount); - return 1; - } else - return 0; + return try_to_writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason); } -EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle); +EXPORT_SYMBOL(try_to_writeback_inodes_sb); /** * sync_inodes_sb - sync sb inode pages diff --git a/include/linux/writeback.h b/include/linux/writeback.h index b82a83aba311..9a9367c0c076 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -87,9 +87,9 @@ int inode_wait(void *); void writeback_inodes_sb(struct super_block *, enum wb_reason reason); void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, enum wb_reason reason); -int writeback_inodes_sb_if_idle(struct super_block *, enum wb_reason reason); -int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr, - enum wb_reason reason); +int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); +int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, + enum wb_reason reason); void sync_inodes_sb(struct super_block *); long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, enum wb_reason reason); -- cgit v1.2.3-59-g8ed1b From ed84825b785ceb932af7dd5aa08614801721320b Mon Sep 17 00:00:00 2001 From: "paul.szabo@sydney.edu.au" Date: Sun, 20 Jan 2013 11:02:10 +1100 Subject: Negative (setpoint-dirty) in bdi_position_ratio() In bdi_position_ratio(), get difference (setpoint-dirty) right even when negative. Both setpoint and dirty are unsigned long, the difference was zero-padded thus wrongly sign-extended to s64. This issue affects all 32-bit architectures, does not affect 64-bit architectures where long and s64 are equivalent. In this function, dirty is between freerun and limit, the pseudo-float x is between [-1,1], expected to be negative about half the time. With zero-padding, instead of a small negative x we obtained a large positive one so bdi_position_ratio() returned garbage. Casting the difference to s64 also prevents overflow with left-shift; though normally these numbers are small and I never observed a 32-bit overflow there. (This patch does not solve the PAE OOM issue.) Paul Szabo psz@maths.usyd.edu.au http://www.maths.usyd.edu.au/u/psz/ School of Mathematics and Statistics University of Sydney Australia Reviewed-by: Jan Kara Reported-by: Paul Szabo Reference: http://bugs.debian.org/695182 Signed-off-by: Paul Szabo Signed-off-by: Fengguang Wu --- mm/page-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 0713bfbf0954..1534ebd6e70f 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -692,7 +692,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi, * => fast response on large errors; small oscillation near setpoint */ setpoint = (freerun + limit) / 2; - x = div_s64((setpoint - dirty) << RATELIMIT_CALC_SHIFT, + x = div_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT, limit - setpoint + 1); pos_ratio = x; pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; -- cgit v1.2.3-59-g8ed1b