From d2859751cd0bf586941ffa7308635a293f943c17 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 6 Jan 2009 14:40:44 +1100 Subject: [XFS] remove old vmap cache XFS's vmap batching simply defers a number (up to 64) of vunmaps, and keeps track of them in a list. To purge the batch, it just goes through the list and calls vunamp on each one. This is pretty poor: a global TLB flush is generally still performed on each vunmap, with the most expensive parts of the operation being the broadcast IPIs and locking involved in the SMP callouts, and the locking involved in the vmap management -- none of these are avoided by just batching up the calls. I'm actually surprised it ever made much difference. (Now that the lazy vmap allocator is upstream, this description is not quite right, but the vunmap batching still doesn't seem to do much) Rip all this logic out of XFS completely. I will improve vmap performance and scalability directly in subsequent patch. Signed-off-by: Nick Piggin Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_buf.c | 75 +--------------------------------------------- 1 file changed, 1 insertion(+), 74 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_buf.c') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index cb329edc925b..0b2177a9fbdc 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -165,75 +165,6 @@ test_page_region( return (mask && (page_private(page) & mask) == mask); } -/* - * Mapping of multi-page buffers into contiguous virtual space - */ - -typedef struct a_list { - void *vm_addr; - struct a_list *next; -} a_list_t; - -static a_list_t *as_free_head; -static int as_list_len; -static DEFINE_SPINLOCK(as_lock); - -/* - * Try to batch vunmaps because they are costly. - */ -STATIC void -free_address( - void *addr) -{ - a_list_t *aentry; - -#ifdef CONFIG_XEN - /* - * Xen needs to be able to make sure it can get an exclusive - * RO mapping of pages it wants to turn into a pagetable. If - * a newly allocated page is also still being vmap()ed by xfs, - * it will cause pagetable construction to fail. This is a - * quick workaround to always eagerly unmap pages so that Xen - * is happy. - */ - vunmap(addr); - return; -#endif - - aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT); - if (likely(aentry)) { - spin_lock(&as_lock); - aentry->next = as_free_head; - aentry->vm_addr = addr; - as_free_head = aentry; - as_list_len++; - spin_unlock(&as_lock); - } else { - vunmap(addr); - } -} - -STATIC void -purge_addresses(void) -{ - a_list_t *aentry, *old; - - if (as_free_head == NULL) - return; - - spin_lock(&as_lock); - aentry = as_free_head; - as_free_head = NULL; - as_list_len = 0; - spin_unlock(&as_lock); - - while ((old = aentry) != NULL) { - vunmap(aentry->vm_addr); - aentry = aentry->next; - kfree(old); - } -} - /* * Internal xfs_buf_t object manipulation */ @@ -333,7 +264,7 @@ xfs_buf_free( uint i; if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) - free_address(bp->b_addr - bp->b_offset); + vunmap(bp->b_addr - bp->b_offset); for (i = 0; i < bp->b_page_count; i++) { struct page *page = bp->b_pages[i]; @@ -455,8 +386,6 @@ _xfs_buf_map_pages( bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; bp->b_flags |= XBF_MAPPED; } else if (flags & XBF_MAPPED) { - if (as_list_len > 64) - purge_addresses(); bp->b_addr = vmap(bp->b_pages, bp->b_page_count, VM_MAP, PAGE_KERNEL); if (unlikely(bp->b_addr == NULL)) @@ -1743,8 +1672,6 @@ xfsbufd( count++; } - if (as_list_len > 0) - purge_addresses(); if (count) blk_run_address_space(target->bt_mapping); -- cgit v1.2.3-59-g8ed1b From 95f8e302c04c0b0c6de35ab399a5551605eeb006 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 6 Jan 2009 14:43:09 +1100 Subject: [XFS] use scalable vmap API Implement XFS's large buffer support with the new vmap APIs. See the vmap rewrite (db64fe02) for some numbers. The biggest improvement that comes from using the new APIs is avoiding the global KVA allocation lock on every call. Signed-off-by: Nick Piggin Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_buf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_buf.c') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 0b2177a9fbdc..d71dc44e21ed 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -264,7 +264,7 @@ xfs_buf_free( uint i; if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) - vunmap(bp->b_addr - bp->b_offset); + vm_unmap_ram(bp->b_addr - bp->b_offset, bp->b_page_count); for (i = 0; i < bp->b_page_count; i++) { struct page *page = bp->b_pages[i]; @@ -386,8 +386,8 @@ _xfs_buf_map_pages( bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; bp->b_flags |= XBF_MAPPED; } else if (flags & XBF_MAPPED) { - bp->b_addr = vmap(bp->b_pages, bp->b_page_count, - VM_MAP, PAGE_KERNEL); + bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, + -1, PAGE_KERNEL); if (unlikely(bp->b_addr == NULL)) return -ENOMEM; bp->b_addr += bp->b_offset; -- cgit v1.2.3-59-g8ed1b From cf7dab801796b9ee52a6dc99888a66bf476538ec Mon Sep 17 00:00:00 2001 From: Felix Blyakher Date: Wed, 18 Feb 2009 15:41:28 -0600 Subject: Revert "[XFS] use scalable vmap API" This reverts commit 95f8e302c04c0b0c6de35ab399a5551605eeb006. This commit caused regression. We'll try to fix use of new vmap API for next release. Signed-off-by: Christoph Hellwig Signed-off-by: Felix Blyakher --- fs/xfs/linux-2.6/xfs_buf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_buf.c') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index d71dc44e21ed..0b2177a9fbdc 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -264,7 +264,7 @@ xfs_buf_free( uint i; if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) - vm_unmap_ram(bp->b_addr - bp->b_offset, bp->b_page_count); + vunmap(bp->b_addr - bp->b_offset); for (i = 0; i < bp->b_page_count; i++) { struct page *page = bp->b_pages[i]; @@ -386,8 +386,8 @@ _xfs_buf_map_pages( bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; bp->b_flags |= XBF_MAPPED; } else if (flags & XBF_MAPPED) { - bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, - -1, PAGE_KERNEL); + bp->b_addr = vmap(bp->b_pages, bp->b_page_count, + VM_MAP, PAGE_KERNEL); if (unlikely(bp->b_addr == NULL)) return -ENOMEM; bp->b_addr += bp->b_offset; -- cgit v1.2.3-59-g8ed1b From 3a011a171906a3a51a43bb860fb7c66a64cab140 Mon Sep 17 00:00:00 2001 From: Felix Blyakher Date: Wed, 18 Feb 2009 15:56:51 -0600 Subject: Revert "[XFS] remove old vmap cache" This reverts commit d2859751cd0bf586941ffa7308635a293f943c17. This commit caused regression. We'll try to fix use of new vmap API for next release. Signed-off-by: Christoph Hellwig Signed-off-by: Felix Blyakher --- fs/xfs/linux-2.6/xfs_buf.c | 75 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) (limited to 'fs/xfs/linux-2.6/xfs_buf.c') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 0b2177a9fbdc..cb329edc925b 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -165,6 +165,75 @@ test_page_region( return (mask && (page_private(page) & mask) == mask); } +/* + * Mapping of multi-page buffers into contiguous virtual space + */ + +typedef struct a_list { + void *vm_addr; + struct a_list *next; +} a_list_t; + +static a_list_t *as_free_head; +static int as_list_len; +static DEFINE_SPINLOCK(as_lock); + +/* + * Try to batch vunmaps because they are costly. + */ +STATIC void +free_address( + void *addr) +{ + a_list_t *aentry; + +#ifdef CONFIG_XEN + /* + * Xen needs to be able to make sure it can get an exclusive + * RO mapping of pages it wants to turn into a pagetable. If + * a newly allocated page is also still being vmap()ed by xfs, + * it will cause pagetable construction to fail. This is a + * quick workaround to always eagerly unmap pages so that Xen + * is happy. + */ + vunmap(addr); + return; +#endif + + aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT); + if (likely(aentry)) { + spin_lock(&as_lock); + aentry->next = as_free_head; + aentry->vm_addr = addr; + as_free_head = aentry; + as_list_len++; + spin_unlock(&as_lock); + } else { + vunmap(addr); + } +} + +STATIC void +purge_addresses(void) +{ + a_list_t *aentry, *old; + + if (as_free_head == NULL) + return; + + spin_lock(&as_lock); + aentry = as_free_head; + as_free_head = NULL; + as_list_len = 0; + spin_unlock(&as_lock); + + while ((old = aentry) != NULL) { + vunmap(aentry->vm_addr); + aentry = aentry->next; + kfree(old); + } +} + /* * Internal xfs_buf_t object manipulation */ @@ -264,7 +333,7 @@ xfs_buf_free( uint i; if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) - vunmap(bp->b_addr - bp->b_offset); + free_address(bp->b_addr - bp->b_offset); for (i = 0; i < bp->b_page_count; i++) { struct page *page = bp->b_pages[i]; @@ -386,6 +455,8 @@ _xfs_buf_map_pages( bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; bp->b_flags |= XBF_MAPPED; } else if (flags & XBF_MAPPED) { + if (as_list_len > 64) + purge_addresses(); bp->b_addr = vmap(bp->b_pages, bp->b_page_count, VM_MAP, PAGE_KERNEL); if (unlikely(bp->b_addr == NULL)) @@ -1672,6 +1743,8 @@ xfsbufd( count++; } + if (as_list_len > 0) + purge_addresses(); if (count) blk_run_address_space(target->bt_mapping); -- cgit v1.2.3-59-g8ed1b From b79631330a653f568a2ac4eb4a32474c80e3fe77 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Mar 2009 14:48:37 -0500 Subject: xfs: only issues a cache flush on unmount if barriers are enabled Currently we unconditionally issue a flush from xfs_free_buftarg, but since 2.6.29-rc1 this gives a warning in the style of end_request: I/O error, dev vdb, sector 0 Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Signed-off-by: Felix Blyakher --- fs/xfs/linux-2.6/xfs_buf.c | 12 ++++++++++-- fs/xfs/linux-2.6/xfs_buf.h | 2 +- fs/xfs/linux-2.6/xfs_super.c | 10 +++++----- 3 files changed, 16 insertions(+), 8 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_buf.c') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index cb329edc925b..aa1016bb9134 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -34,6 +34,12 @@ #include #include +#include "xfs_sb.h" +#include "xfs_inum.h" +#include "xfs_ag.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" + static kmem_zone_t *xfs_buf_zone; STATIC int xfsbufd(void *); STATIC int xfsbufd_wakeup(int, gfp_t); @@ -1435,10 +1441,12 @@ xfs_unregister_buftarg( void xfs_free_buftarg( - xfs_buftarg_t *btp) + struct xfs_mount *mp, + struct xfs_buftarg *btp) { xfs_flush_buftarg(btp, 1); - xfs_blkdev_issue_flush(btp); + if (mp->m_flags & XFS_MOUNT_BARRIER) + xfs_blkdev_issue_flush(btp); xfs_free_bufhash(btp); iput(btp->bt_mapping->host); diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 288ae7c4c800..9b4d666ad31f 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -413,7 +413,7 @@ static inline int XFS_bwrite(xfs_buf_t *bp) * Handling of buftargs. */ extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); -extern void xfs_free_buftarg(xfs_buftarg_t *); +extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); extern void xfs_wait_buftarg(xfs_buftarg_t *); extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); extern int xfs_flush_buftarg(xfs_buftarg_t *, int); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index bc1e64708e2b..8483b35821e0 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -733,15 +733,15 @@ xfs_close_devices( { if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { struct block_device *logdev = mp->m_logdev_targp->bt_bdev; - xfs_free_buftarg(mp->m_logdev_targp); + xfs_free_buftarg(mp, mp->m_logdev_targp); xfs_blkdev_put(logdev); } if (mp->m_rtdev_targp) { struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; - xfs_free_buftarg(mp->m_rtdev_targp); + xfs_free_buftarg(mp, mp->m_rtdev_targp); xfs_blkdev_put(rtdev); } - xfs_free_buftarg(mp->m_ddev_targp); + xfs_free_buftarg(mp, mp->m_ddev_targp); } /* @@ -810,9 +810,9 @@ xfs_open_devices( out_free_rtdev_targ: if (mp->m_rtdev_targp) - xfs_free_buftarg(mp->m_rtdev_targp); + xfs_free_buftarg(mp, mp->m_rtdev_targp); out_free_ddev_targ: - xfs_free_buftarg(mp->m_ddev_targp); + xfs_free_buftarg(mp, mp->m_ddev_targp); out_close_rtdev: if (rtdev) xfs_blkdev_put(rtdev); -- cgit v1.2.3-59-g8ed1b From c626d174cfe38e7f0545d074c299527892cd8c45 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 6 Apr 2009 18:42:11 +0200 Subject: xfs: prevent unwritten extent conversion from blocking I/O completion Unwritten extent conversion can recurse back into the filesystem due to memory allocation. Memory reclaim requires I/O completions to be processed to allow the callers to make progress. If the I/O completion workqueue thread is doing the recursion, then we have a deadlock situation. Move unwritten extent completion into it's own workqueue so it doesn't block I/O completions for normal delayed allocation or overwrite data. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_aops.c | 38 +++++++++++++++++++++----------------- fs/xfs/linux-2.6/xfs_aops.h | 1 + fs/xfs/linux-2.6/xfs_buf.c | 9 +++++++++ 3 files changed, 31 insertions(+), 17 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_buf.c') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index c13f67300fe7..7ec89fc05b2b 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -152,23 +152,6 @@ xfs_find_bdev_for_inode( return mp->m_ddev_targp->bt_bdev; } -/* - * Schedule IO completion handling on a xfsdatad if this was - * the final hold on this ioend. If we are asked to wait, - * flush the workqueue. - */ -STATIC void -xfs_finish_ioend( - xfs_ioend_t *ioend, - int wait) -{ - if (atomic_dec_and_test(&ioend->io_remaining)) { - queue_work(xfsdatad_workqueue, &ioend->io_work); - if (wait) - flush_workqueue(xfsdatad_workqueue); - } -} - /* * We're now finished for good with this ioend structure. * Update the page state via the associated buffer_heads, @@ -309,6 +292,27 @@ xfs_end_bio_read( xfs_destroy_ioend(ioend); } +/* + * Schedule IO completion handling on a xfsdatad if this was + * the final hold on this ioend. If we are asked to wait, + * flush the workqueue. + */ +STATIC void +xfs_finish_ioend( + xfs_ioend_t *ioend, + int wait) +{ + if (atomic_dec_and_test(&ioend->io_remaining)) { + struct workqueue_struct *wq = xfsdatad_workqueue; + if (ioend->io_work.func == xfs_end_bio_unwritten) + wq = xfsconvertd_workqueue; + + queue_work(wq, &ioend->io_work); + if (wait) + flush_workqueue(wq); + } +} + /* * Allocate and initialise an IO completion structure. * We need to track unwritten extent write completion here initially. diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index 1dd528849755..221b3e66ceef 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h @@ -19,6 +19,7 @@ #define __XFS_AOPS_H__ extern struct workqueue_struct *xfsdatad_workqueue; +extern struct workqueue_struct *xfsconvertd_workqueue; extern mempool_t *xfs_ioend_pool; /* diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index aa1016bb9134..e28800a9f2b5 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -51,6 +51,7 @@ static struct shrinker xfs_buf_shake = { static struct workqueue_struct *xfslogd_workqueue; struct workqueue_struct *xfsdatad_workqueue; +struct workqueue_struct *xfsconvertd_workqueue; #ifdef XFS_BUF_TRACE void @@ -1775,6 +1776,7 @@ xfs_flush_buftarg( xfs_buf_t *bp, *n; int pincount = 0; + xfs_buf_runall_queues(xfsconvertd_workqueue); xfs_buf_runall_queues(xfsdatad_workqueue); xfs_buf_runall_queues(xfslogd_workqueue); @@ -1831,9 +1833,15 @@ xfs_buf_init(void) if (!xfsdatad_workqueue) goto out_destroy_xfslogd_workqueue; + xfsconvertd_workqueue = create_workqueue("xfsconvertd"); + if (!xfsconvertd_workqueue) + goto out_destroy_xfsdatad_workqueue; + register_shrinker(&xfs_buf_shake); return 0; + out_destroy_xfsdatad_workqueue: + destroy_workqueue(xfsdatad_workqueue); out_destroy_xfslogd_workqueue: destroy_workqueue(xfslogd_workqueue); out_free_buf_zone: @@ -1849,6 +1857,7 @@ void xfs_buf_terminate(void) { unregister_shrinker(&xfs_buf_shake); + destroy_workqueue(xfsconvertd_workqueue); destroy_workqueue(xfsdatad_workqueue); destroy_workqueue(xfslogd_workqueue); kmem_zone_destroy(xfs_buf_zone); -- cgit v1.2.3-59-g8ed1b