diff options
Diffstat (limited to 'fs/xfs/xfs_buf.c')
-rw-r--r-- | fs/xfs/xfs_buf.c | 1012 |
1 files changed, 609 insertions, 403 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 217e4f82a44a..dde346450952 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -5,22 +5,24 @@ */ #include "xfs.h" #include <linux/backing-dev.h> +#include <linux/dax.h> #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_trace.h" #include "xfs_log.h" +#include "xfs_log_recover.h" +#include "xfs_log_priv.h" +#include "xfs_trans.h" +#include "xfs_buf_item.h" #include "xfs_errortag.h" #include "xfs_error.h" +#include "xfs_ag.h" -static kmem_zone_t *xfs_buf_zone; - -#define xb_to_gfp(flags) \ - ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN) +struct kmem_cache *xfs_buf_cache; /* * Locking orders @@ -40,7 +42,7 @@ static kmem_zone_t *xfs_buf_zone; * pag_buf_lock * lru_lock * - * xfs_buftarg_wait_rele + * xfs_buftarg_drain_rele * lru_lock * b_lock (trylock due to inversion) * @@ -49,6 +51,15 @@ static kmem_zone_t *xfs_buf_zone; * b_lock (trylock due to inversion) */ +static int __xfs_buf_submit(struct xfs_buf *bp, bool wait); + +static inline int +xfs_buf_submit( + struct xfs_buf *bp) +{ + return __xfs_buf_submit(bp, !(bp->b_flags & XBF_ASYNC)); +} + static inline int xfs_buf_is_vmapped( struct xfs_buf *bp) @@ -67,7 +78,7 @@ static inline int xfs_buf_vmap_len( struct xfs_buf *bp) { - return (bp->b_page_count * PAGE_SIZE) - bp->b_offset; + return (bp->b_page_count * PAGE_SIZE); } /* @@ -76,7 +87,7 @@ xfs_buf_vmap_len( * because the corresponding decrement is deferred to buffer release. Buffers * can undergo I/O multiple times in a hold-release cycle and per buffer I/O * tracking adds unnecessary overhead. This is used for sychronization purposes - * with unmount (see xfs_wait_buftarg()), so all we really need is a count of + * with unmount (see xfs_buftarg_drain()), so all we really need is a count of * in-flight buffers. * * Buffers that are never released (e.g., superblock, iclog buffers) must set @@ -211,9 +222,7 @@ _xfs_buf_alloc( int i; *bpp = NULL; - bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS); - if (unlikely(!bp)) - return -ENOMEM; + bp = kmem_cache_zalloc(xfs_buf_cache, GFP_NOFS | __GFP_NOFAIL); /* * We don't want certain flags to appear in b_flags unless they are @@ -240,11 +249,11 @@ _xfs_buf_alloc( */ error = xfs_buf_get_maps(bp, nmaps); if (error) { - kmem_cache_free(xfs_buf_zone, bp); + kmem_cache_free(xfs_buf_cache, bp); return error; } - bp->b_bn = map[0].bm_bn; + bp->b_rhash_key = map[0].bm_bn; bp->b_length = 0; for (i = 0; i < nmaps; i++) { bp->b_maps[i].bm_bn = map[i].bm_bn; @@ -262,187 +271,143 @@ _xfs_buf_alloc( return 0; } -/* - * Allocate a page array capable of holding a specified number - * of pages, and point the page buf at it. - */ -STATIC int -_xfs_buf_get_pages( - xfs_buf_t *bp, - int page_count) +static void +xfs_buf_free_pages( + struct xfs_buf *bp) { - /* Make sure that we have a page list */ - if (bp->b_pages == NULL) { - bp->b_page_count = page_count; - if (page_count <= XB_PAGES) { - bp->b_pages = bp->b_page_array; - } else { - bp->b_pages = kmem_alloc(sizeof(struct page *) * - page_count, KM_NOFS); - if (bp->b_pages == NULL) - return -ENOMEM; - } - memset(bp->b_pages, 0, sizeof(struct page *) * page_count); + uint i; + + ASSERT(bp->b_flags & _XBF_PAGES); + + if (xfs_buf_is_vmapped(bp)) + vm_unmap_ram(bp->b_addr, bp->b_page_count); + + for (i = 0; i < bp->b_page_count; i++) { + if (bp->b_pages[i]) + __free_page(bp->b_pages[i]); } - return 0; + if (current->reclaim_state) + current->reclaim_state->reclaimed_slab += bp->b_page_count; + + if (bp->b_pages != bp->b_page_array) + kmem_free(bp->b_pages); + bp->b_pages = NULL; + bp->b_flags &= ~_XBF_PAGES; } -/* - * Frees b_pages if it was allocated. - */ -STATIC void -_xfs_buf_free_pages( - xfs_buf_t *bp) +static void +xfs_buf_free_callback( + struct callback_head *cb) { - if (bp->b_pages != bp->b_page_array) { - kmem_free(bp->b_pages); - bp->b_pages = NULL; - } + struct xfs_buf *bp = container_of(cb, struct xfs_buf, b_rcu); + + xfs_buf_free_maps(bp); + kmem_cache_free(xfs_buf_cache, bp); } -/* - * Releases the specified buffer. - * - * The modification state of any associated pages is left unchanged. - * The buffer must not be on any hash - use xfs_buf_rele instead for - * hashed and refcounted buffers - */ static void xfs_buf_free( - xfs_buf_t *bp) + struct xfs_buf *bp) { trace_xfs_buf_free(bp, _RET_IP_); ASSERT(list_empty(&bp->b_lru)); - if (bp->b_flags & _XBF_PAGES) { - uint i; + if (bp->b_flags & _XBF_PAGES) + xfs_buf_free_pages(bp); + else if (bp->b_flags & _XBF_KMEM) + kmem_free(bp->b_addr); - if (xfs_buf_is_vmapped(bp)) - vm_unmap_ram(bp->b_addr - bp->b_offset, - bp->b_page_count); + call_rcu(&bp->b_rcu, xfs_buf_free_callback); +} - for (i = 0; i < bp->b_page_count; i++) { - struct page *page = bp->b_pages[i]; +static int +xfs_buf_alloc_kmem( + struct xfs_buf *bp, + xfs_buf_flags_t flags) +{ + xfs_km_flags_t kmflag_mask = KM_NOFS; + size_t size = BBTOB(bp->b_length); - __free_page(page); - } - } else if (bp->b_flags & _XBF_KMEM) + /* Assure zeroed buffer for non-read cases. */ + if (!(flags & XBF_READ)) + kmflag_mask |= KM_ZERO; + + bp->b_addr = kmem_alloc(size, kmflag_mask); + if (!bp->b_addr) + return -ENOMEM; + + if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) != + ((unsigned long)bp->b_addr & PAGE_MASK)) { + /* b_addr spans two pages - use alloc_page instead */ kmem_free(bp->b_addr); - _xfs_buf_free_pages(bp); - xfs_buf_free_maps(bp); - kmem_cache_free(xfs_buf_zone, bp); + bp->b_addr = NULL; + return -ENOMEM; + } + bp->b_offset = offset_in_page(bp->b_addr); + bp->b_pages = bp->b_page_array; + bp->b_pages[0] = kmem_to_page(bp->b_addr); + bp->b_page_count = 1; + bp->b_flags |= _XBF_KMEM; + return 0; } -/* - * Allocates all the pages for buffer in question and builds it's page list. - */ -STATIC int -xfs_buf_allocate_memory( - xfs_buf_t *bp, - uint flags) -{ - size_t size; - size_t nbytes, offset; - gfp_t gfp_mask = xb_to_gfp(flags); - unsigned short page_count, i; - xfs_off_t start, end; - int error; - xfs_km_flags_t kmflag_mask = 0; +static int +xfs_buf_alloc_pages( + struct xfs_buf *bp, + xfs_buf_flags_t flags) +{ + gfp_t gfp_mask = __GFP_NOWARN; + long filled = 0; - /* - * assure zeroed buffer for non-read cases. - */ - if (!(flags & XBF_READ)) { - kmflag_mask |= KM_ZERO; - gfp_mask |= __GFP_ZERO; + if (flags & XBF_READ_AHEAD) + gfp_mask |= __GFP_NORETRY; + else + gfp_mask |= GFP_NOFS; + + /* Make sure that we have a page list */ + bp->b_page_count = DIV_ROUND_UP(BBTOB(bp->b_length), PAGE_SIZE); + if (bp->b_page_count <= XB_PAGES) { + bp->b_pages = bp->b_page_array; + } else { + bp->b_pages = kzalloc(sizeof(struct page *) * bp->b_page_count, + gfp_mask); + if (!bp->b_pages) + return -ENOMEM; } + bp->b_flags |= _XBF_PAGES; + + /* Assure zeroed buffer for non-read cases. */ + if (!(flags & XBF_READ)) + gfp_mask |= __GFP_ZERO; /* - * for buffers that are contained within a single page, just allocate - * the memory from the heap - there's no need for the complexity of - * page arrays to keep allocation down to order 0. + * Bulk filling of pages can take multiple calls. Not filling the entire + * array is not an allocation failure, so don't back off if we get at + * least one extra page. */ - size = BBTOB(bp->b_length); - if (size < PAGE_SIZE) { - int align_mask = xfs_buftarg_dma_alignment(bp->b_target); - bp->b_addr = kmem_alloc_io(size, align_mask, - KM_NOFS | kmflag_mask); - if (!bp->b_addr) { - /* low memory - use alloc_page loop instead */ - goto use_alloc_page; - } + for (;;) { + long last = filled; - if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) != - ((unsigned long)bp->b_addr & PAGE_MASK)) { - /* b_addr spans two pages - use alloc_page instead */ - kmem_free(bp->b_addr); - bp->b_addr = NULL; - goto use_alloc_page; + filled = alloc_pages_bulk_array(gfp_mask, bp->b_page_count, + bp->b_pages); + if (filled == bp->b_page_count) { + XFS_STATS_INC(bp->b_mount, xb_page_found); + break; } - bp->b_offset = offset_in_page(bp->b_addr); - bp->b_pages = bp->b_page_array; - bp->b_pages[0] = kmem_to_page(bp->b_addr); - bp->b_page_count = 1; - bp->b_flags |= _XBF_KMEM; - return 0; - } -use_alloc_page: - start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT; - end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1) - >> PAGE_SHIFT; - page_count = end - start; - error = _xfs_buf_get_pages(bp, page_count); - if (unlikely(error)) - return error; - - offset = bp->b_offset; - bp->b_flags |= _XBF_PAGES; - - for (i = 0; i < bp->b_page_count; i++) { - struct page *page; - uint retries = 0; -retry: - page = alloc_page(gfp_mask); - if (unlikely(page == NULL)) { - if (flags & XBF_READ_AHEAD) { - bp->b_page_count = i; - error = -ENOMEM; - goto out_free_pages; - } + if (filled != last) + continue; - /* - * This could deadlock. - * - * But until all the XFS lowlevel code is revamped to - * handle buffer allocation failures we can't do much. - */ - if (!(++retries % 100)) - xfs_err(NULL, - "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)", - current->comm, current->pid, - __func__, gfp_mask); - - XFS_STATS_INC(bp->b_mount, xb_page_retries); - congestion_wait(BLK_RW_ASYNC, HZ/50); - goto retry; + if (flags & XBF_READ_AHEAD) { + xfs_buf_free_pages(bp); + return -ENOMEM; } - XFS_STATS_INC(bp->b_mount, xb_page_found); - - nbytes = min_t(size_t, size, PAGE_SIZE - offset); - size -= nbytes; - bp->b_pages[i] = page; - offset = 0; + XFS_STATS_INC(bp->b_mount, xb_page_retries); + memalloc_retry_wait(gfp_mask); } return 0; - -out_free_pages: - for (i = 0; i < bp->b_page_count; i++) - __free_page(bp->b_pages[i]); - bp->b_flags &= ~_XBF_PAGES; - return error; } /* @@ -450,13 +415,13 @@ out_free_pages: */ STATIC int _xfs_buf_map_pages( - xfs_buf_t *bp, - uint flags) + struct xfs_buf *bp, + xfs_buf_flags_t flags) { ASSERT(bp->b_flags & _XBF_PAGES); if (bp->b_page_count == 1) { /* A single page buffer is always mappable */ - bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; + bp->b_addr = page_address(bp->b_pages[0]); } else if (flags & XBF_UNMAPPED) { bp->b_addr = NULL; } else { @@ -474,7 +439,7 @@ _xfs_buf_map_pages( nofs_flag = memalloc_nofs_save(); do { bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, - -1, PAGE_KERNEL); + -1); if (bp->b_addr) break; vm_unmap_aliases(); @@ -483,7 +448,6 @@ _xfs_buf_map_pages( if (!bp->b_addr) return -ENOMEM; - bp->b_addr += bp->b_offset; } return 0; @@ -506,7 +470,7 @@ _xfs_buf_obj_cmp( */ BUILD_BUG_ON(offsetof(struct xfs_buf_map, bm_bn) != 0); - if (bp->b_bn != map->bm_bn) + if (bp->b_rhash_key != map->bm_bn) return 1; if (unlikely(bp->b_length != map->bm_len)) { @@ -528,7 +492,7 @@ static const struct rhashtable_params xfs_buf_hash_params = { .min_size = 32, /* empty AGs have minimal footprint */ .nelem_hint = 16, .key_len = sizeof(xfs_daddr_t), - .key_offset = offsetof(struct xfs_buf, b_bn), + .key_offset = offsetof(struct xfs_buf, b_rhash_key), .head_offset = offsetof(struct xfs_buf, b_rhash_head), .automatic_shrinking = true, .obj_cmpfn = _xfs_buf_obj_cmp, @@ -549,100 +513,45 @@ xfs_buf_hash_destroy( rhashtable_destroy(&pag->pag_buf_hash); } -/* - * Look up a buffer in the buffer cache and return it referenced and locked - * in @found_bp. - * - * If @new_bp is supplied and we have a lookup miss, insert @new_bp into the - * cache. - * - * If XBF_TRYLOCK is set in @flags, only try to lock the buffer and return - * -EAGAIN if we fail to lock it. - * - * Return values are: - * -EFSCORRUPTED if have been supplied with an invalid address - * -EAGAIN on trylock failure - * -ENOENT if we fail to find a match and @new_bp was NULL - * 0, with @found_bp: - * - @new_bp if we inserted it into the cache - * - the buffer we found and locked. - */ static int -xfs_buf_find( +xfs_buf_map_verify( struct xfs_buftarg *btp, - struct xfs_buf_map *map, - int nmaps, - xfs_buf_flags_t flags, - struct xfs_buf *new_bp, - struct xfs_buf **found_bp) + struct xfs_buf_map *map) { - struct xfs_perag *pag; - xfs_buf_t *bp; - struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn }; xfs_daddr_t eofs; - int i; - - *found_bp = NULL; - - for (i = 0; i < nmaps; i++) - cmap.bm_len += map[i].bm_len; /* Check for IOs smaller than the sector size / not sector aligned */ - ASSERT(!(BBTOB(cmap.bm_len) < btp->bt_meta_sectorsize)); - ASSERT(!(BBTOB(cmap.bm_bn) & (xfs_off_t)btp->bt_meta_sectormask)); + ASSERT(!(BBTOB(map->bm_len) < btp->bt_meta_sectorsize)); + ASSERT(!(BBTOB(map->bm_bn) & (xfs_off_t)btp->bt_meta_sectormask)); /* * Corrupted block numbers can get through to here, unfortunately, so we * have to check that the buffer falls within the filesystem bounds. */ eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks); - if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) { + if (map->bm_bn < 0 || map->bm_bn >= eofs) { xfs_alert(btp->bt_mount, "%s: daddr 0x%llx out of range, EOFS 0x%llx", - __func__, cmap.bm_bn, eofs); + __func__, map->bm_bn, eofs); WARN_ON(1); return -EFSCORRUPTED; } - - pag = xfs_perag_get(btp->bt_mount, - xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn)); - - spin_lock(&pag->pag_buf_lock); - bp = rhashtable_lookup_fast(&pag->pag_buf_hash, &cmap, - xfs_buf_hash_params); - if (bp) { - atomic_inc(&bp->b_hold); - goto found; - } - - /* No match found */ - if (!new_bp) { - XFS_STATS_INC(btp->bt_mount, xb_miss_locked); - spin_unlock(&pag->pag_buf_lock); - xfs_perag_put(pag); - return -ENOENT; - } - - /* the buffer keeps the perag reference until it is freed */ - new_bp->b_pag = pag; - rhashtable_insert_fast(&pag->pag_buf_hash, &new_bp->b_rhash_head, - xfs_buf_hash_params); - spin_unlock(&pag->pag_buf_lock); - *found_bp = new_bp; return 0; +} -found: - spin_unlock(&pag->pag_buf_lock); - xfs_perag_put(pag); - - if (!xfs_buf_trylock(bp)) { - if (flags & XBF_TRYLOCK) { - xfs_buf_rele(bp); - XFS_STATS_INC(btp->bt_mount, xb_busy_locked); +static int +xfs_buf_find_lock( + struct xfs_buf *bp, + xfs_buf_flags_t flags) +{ + if (flags & XBF_TRYLOCK) { + if (!xfs_buf_trylock(bp)) { + XFS_STATS_INC(bp->b_mount, xb_busy_locked); return -EAGAIN; } + } else { xfs_buf_lock(bp); - XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited); + XFS_STATS_INC(bp->b_mount, xb_get_locked_waited); } /* @@ -652,32 +561,107 @@ found: */ if (bp->b_flags & XBF_STALE) { ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); - ASSERT(bp->b_iodone == NULL); bp->b_flags &= _XBF_KMEM | _XBF_PAGES; bp->b_ops = NULL; } + return 0; +} + +static inline int +xfs_buf_lookup( + struct xfs_perag *pag, + struct xfs_buf_map *map, + xfs_buf_flags_t flags, + struct xfs_buf **bpp) +{ + struct xfs_buf *bp; + int error; + + rcu_read_lock(); + bp = rhashtable_lookup(&pag->pag_buf_hash, map, xfs_buf_hash_params); + if (!bp || !atomic_inc_not_zero(&bp->b_hold)) { + rcu_read_unlock(); + return -ENOENT; + } + rcu_read_unlock(); + + error = xfs_buf_find_lock(bp, flags); + if (error) { + xfs_buf_rele(bp); + return error; + } trace_xfs_buf_find(bp, flags, _RET_IP_); - XFS_STATS_INC(btp->bt_mount, xb_get_locked); - *found_bp = bp; + *bpp = bp; return 0; } -struct xfs_buf * -xfs_buf_incore( - struct xfs_buftarg *target, - xfs_daddr_t blkno, - size_t numblks, - xfs_buf_flags_t flags) +/* + * Insert the new_bp into the hash table. This consumes the perag reference + * taken for the lookup regardless of the result of the insert. + */ +static int +xfs_buf_find_insert( + struct xfs_buftarg *btp, + struct xfs_perag *pag, + struct xfs_buf_map *cmap, + struct xfs_buf_map *map, + int nmaps, + xfs_buf_flags_t flags, + struct xfs_buf **bpp) { + struct xfs_buf *new_bp; struct xfs_buf *bp; int error; - DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); - error = xfs_buf_find(target, &map, 1, flags, NULL, &bp); + error = _xfs_buf_alloc(btp, map, nmaps, flags, &new_bp); if (error) - return NULL; - return bp; + goto out_drop_pag; + + /* + * For buffers that fit entirely within a single page, first attempt to + * allocate the memory from the heap to minimise memory usage. If we + * can't get heap memory for these small buffers, we fall back to using + * the page allocator. + */ + if (BBTOB(new_bp->b_length) >= PAGE_SIZE || + xfs_buf_alloc_kmem(new_bp, flags) < 0) { + error = xfs_buf_alloc_pages(new_bp, flags); + if (error) + goto out_free_buf; + } + + spin_lock(&pag->pag_buf_lock); + bp = rhashtable_lookup_get_insert_fast(&pag->pag_buf_hash, + &new_bp->b_rhash_head, xfs_buf_hash_params); + if (IS_ERR(bp)) { + error = PTR_ERR(bp); + spin_unlock(&pag->pag_buf_lock); + goto out_free_buf; + } + if (bp) { + /* found an existing buffer */ + atomic_inc(&bp->b_hold); + spin_unlock(&pag->pag_buf_lock); + error = xfs_buf_find_lock(bp, flags); + if (error) + xfs_buf_rele(bp); + else + *bpp = bp; + goto out_free_buf; + } + + /* The new buffer keeps the perag reference until it is freed. */ + new_bp->b_pag = pag; + spin_unlock(&pag->pag_buf_lock); + *bpp = new_bp; + return 0; + +out_free_buf: + xfs_buf_free(new_bp); +out_drop_pag: + xfs_perag_put(pag); + return error; } /* @@ -687,48 +671,56 @@ xfs_buf_incore( */ int xfs_buf_get_map( - struct xfs_buftarg *target, + struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, xfs_buf_flags_t flags, struct xfs_buf **bpp) { - struct xfs_buf *bp; - struct xfs_buf *new_bp; - int error = 0; + struct xfs_perag *pag; + struct xfs_buf *bp = NULL; + struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn }; + int error; + int i; - *bpp = NULL; - error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp); - if (!error) - goto found; - if (error != -ENOENT) - return error; + for (i = 0; i < nmaps; i++) + cmap.bm_len += map[i].bm_len; - error = _xfs_buf_alloc(target, map, nmaps, flags, &new_bp); + error = xfs_buf_map_verify(btp, &cmap); if (error) return error; - error = xfs_buf_allocate_memory(new_bp, flags); - if (error) { - xfs_buf_free(new_bp); - return error; - } + pag = xfs_perag_get(btp->bt_mount, + xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn)); - error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp); - if (error) { - xfs_buf_free(new_bp); - return error; - } + error = xfs_buf_lookup(pag, &cmap, flags, &bp); + if (error && error != -ENOENT) + goto out_put_perag; + + /* cache hits always outnumber misses by at least 10:1 */ + if (unlikely(!bp)) { + XFS_STATS_INC(btp->bt_mount, xb_miss_locked); + + if (flags & XBF_INCORE) + goto out_put_perag; - if (bp != new_bp) - xfs_buf_free(new_bp); + /* xfs_buf_find_insert() consumes the perag reference. */ + error = xfs_buf_find_insert(btp, pag, &cmap, map, nmaps, + flags, &bp); + if (error) + return error; + } else { + XFS_STATS_INC(btp->bt_mount, xb_get_locked); + xfs_perag_put(pag); + } -found: + /* We do not hold a perag reference anymore. */ if (!bp->b_addr) { error = _xfs_buf_map_pages(bp, flags); if (unlikely(error)) { - xfs_warn(target->bt_mount, - "%s: failed to map pagesn", __func__); + xfs_warn_ratelimited(btp->bt_mount, + "%s: failed to map %u pages", __func__, + bp->b_page_count); xfs_buf_relse(bp); return error; } @@ -741,21 +733,25 @@ found: if (!(flags & XBF_READ)) xfs_buf_ioerror(bp, 0); - XFS_STATS_INC(target->bt_mount, xb_get); + XFS_STATS_INC(btp->bt_mount, xb_get); trace_xfs_buf_get(bp, flags, _RET_IP_); *bpp = bp; return 0; + +out_put_perag: + xfs_perag_put(pag); + return error; } -STATIC int +int _xfs_buf_read( - xfs_buf_t *bp, + struct xfs_buf *bp, xfs_buf_flags_t flags) { ASSERT(!(flags & XBF_WRITE)); ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL); - bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); + bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD | XBF_DONE); bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); return xfs_buf_submit(bp); @@ -852,7 +848,15 @@ xfs_buf_read_map( * buffer. */ if (error) { - if (!XFS_FORCED_SHUTDOWN(target->bt_mount)) + /* + * Check against log shutdown for error reporting because + * metadata writeback may require a read first and we need to + * report errors in metadata writeback until the log is shut + * down. High level transaction read functions already check + * against mount shutdown, anyway, so we only need to be + * concerned about low level IO interactions here. + */ + if (!xlog_is_shutdown(target->bt_mount->m_log)) xfs_buf_ioerror_alert(bp, fa); bp->b_flags &= ~XBF_DONE; @@ -882,9 +886,6 @@ xfs_buf_readahead_map( { struct xfs_buf *bp; - if (bdi_read_congested(target->bt_bdev->bd_bdi)) - return; - xfs_buf_read_map(target, map, nmaps, XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops, __this_address); @@ -892,14 +893,16 @@ xfs_buf_readahead_map( /* * Read an uncached buffer from disk. Allocates and returns a locked - * buffer containing the disk contents or nothing. + * buffer containing the disk contents or nothing. Uncached buffers always have + * a cache index of XFS_BUF_DADDR_NULL so we can easily determine if the buffer + * is cached or uncached during fault diagnosis. */ int xfs_buf_read_uncached( struct xfs_buftarg *target, xfs_daddr_t daddr, size_t numblks, - int flags, + xfs_buf_flags_t flags, struct xfs_buf **bpp, const struct xfs_buf_ops *ops) { @@ -914,7 +917,7 @@ xfs_buf_read_uncached( /* set up the buffer for a read IO */ ASSERT(bp->b_map_count == 1); - bp->b_bn = XFS_BUF_DADDR_NULL; /* always null for uncached buffers */ + bp->b_rhash_key = XFS_BUF_DADDR_NULL; bp->b_maps[0].bm_bn = daddr; bp->b_flags |= XBF_READ; bp->b_ops = ops; @@ -934,11 +937,10 @@ int xfs_buf_get_uncached( struct xfs_buftarg *target, size_t numblks, - int flags, + xfs_buf_flags_t flags, struct xfs_buf **bpp) { - unsigned long page_count; - int error, i; + int error; struct xfs_buf *bp; DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks); @@ -947,41 +949,25 @@ xfs_buf_get_uncached( /* flags might contain irrelevant bits, pass only what we care about */ error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp); if (error) - goto fail; + return error; - page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT; - error = _xfs_buf_get_pages(bp, page_count); + error = xfs_buf_alloc_pages(bp, flags); if (error) goto fail_free_buf; - for (i = 0; i < page_count; i++) { - bp->b_pages[i] = alloc_page(xb_to_gfp(flags)); - if (!bp->b_pages[i]) { - error = -ENOMEM; - goto fail_free_mem; - } - } - bp->b_flags |= _XBF_PAGES; - error = _xfs_buf_map_pages(bp, 0); if (unlikely(error)) { xfs_warn(target->bt_mount, "%s: failed to map pages", __func__); - goto fail_free_mem; + goto fail_free_buf; } trace_xfs_buf_get_uncached(bp, _RET_IP_); *bpp = bp; return 0; - fail_free_mem: - while (--i >= 0) - __free_page(bp->b_pages[i]); - _xfs_buf_free_pages(bp); - fail_free_buf: - xfs_buf_free_maps(bp); - kmem_cache_free(xfs_buf_zone, bp); - fail: +fail_free_buf: + xfs_buf_free(bp); return error; } @@ -992,7 +978,7 @@ xfs_buf_get_uncached( */ void xfs_buf_hold( - xfs_buf_t *bp) + struct xfs_buf *bp) { trace_xfs_buf_hold(bp, _RET_IP_); atomic_inc(&bp->b_hold); @@ -1004,7 +990,7 @@ xfs_buf_hold( */ void xfs_buf_rele( - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_perag *pag = bp->b_pag; bool release; @@ -1148,7 +1134,7 @@ xfs_buf_unlock( STATIC void xfs_buf_wait_unpin( - xfs_buf_t *bp) + struct xfs_buf *bp) { DECLARE_WAITQUEUE (wait, current); @@ -1166,20 +1152,145 @@ xfs_buf_wait_unpin( set_current_state(TASK_RUNNING); } +static void +xfs_buf_ioerror_alert_ratelimited( + struct xfs_buf *bp) +{ + static unsigned long lasttime; + static struct xfs_buftarg *lasttarg; + + if (bp->b_target != lasttarg || + time_after(jiffies, (lasttime + 5*HZ))) { + lasttime = jiffies; + xfs_buf_ioerror_alert(bp, __this_address); + } + lasttarg = bp->b_target; +} + /* - * Buffer Utility Routines + * Account for this latest trip around the retry handler, and decide if + * we've failed enough times to constitute a permanent failure. */ +static bool +xfs_buf_ioerror_permanent( + struct xfs_buf *bp, + struct xfs_error_cfg *cfg) +{ + struct xfs_mount *mp = bp->b_mount; -void + if (cfg->max_retries != XFS_ERR_RETRY_FOREVER && + ++bp->b_retries > cfg->max_retries) + return true; + if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER && + time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time)) + return true; + + /* At unmount we may treat errors differently */ + if (xfs_is_unmounting(mp) && mp->m_fail_unmount) + return true; + + return false; +} + +/* + * On a sync write or shutdown we just want to stale the buffer and let the + * caller handle the error in bp->b_error appropriately. + * + * If the write was asynchronous then no one will be looking for the error. If + * this is the first failure of this type, clear the error state and write the + * buffer out again. This means we always retry an async write failure at least + * once, but we also need to set the buffer up to behave correctly now for + * repeated failures. + * + * If we get repeated async write failures, then we take action according to the + * error configuration we have been set up to use. + * + * Returns true if this function took care of error handling and the caller must + * not touch the buffer again. Return false if the caller should proceed with + * normal I/O completion handling. + */ +static bool +xfs_buf_ioend_handle_error( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_mount; + struct xfs_error_cfg *cfg; + + /* + * If we've already shutdown the journal because of I/O errors, there's + * no point in giving this a retry. + */ + if (xlog_is_shutdown(mp->m_log)) + goto out_stale; + + xfs_buf_ioerror_alert_ratelimited(bp); + + /* + * We're not going to bother about retrying this during recovery. + * One strike! + */ + if (bp->b_flags & _XBF_LOGRECOVERY) { + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + return false; + } + + /* + * Synchronous writes will have callers process the error. + */ + if (!(bp->b_flags & XBF_ASYNC)) + goto out_stale; + + trace_xfs_buf_iodone_async(bp, _RET_IP_); + + cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error); + if (bp->b_last_error != bp->b_error || + !(bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL))) { + bp->b_last_error = bp->b_error; + if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER && + !bp->b_first_retry_time) + bp->b_first_retry_time = jiffies; + goto resubmit; + } + + /* + * Permanent error - we need to trigger a shutdown if we haven't already + * to indicate that inconsistency will result from this action. + */ + if (xfs_buf_ioerror_permanent(bp, cfg)) { + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + goto out_stale; + } + + /* Still considered a transient error. Caller will schedule retries. */ + if (bp->b_flags & _XBF_INODES) + xfs_buf_inode_io_fail(bp); + else if (bp->b_flags & _XBF_DQUOTS) + xfs_buf_dquot_io_fail(bp); + else + ASSERT(list_empty(&bp->b_li_list)); + xfs_buf_ioerror(bp, 0); + xfs_buf_relse(bp); + return true; + +resubmit: + xfs_buf_ioerror(bp, 0); + bp->b_flags |= (XBF_DONE | XBF_WRITE_FAIL); + xfs_buf_submit(bp); + return true; +out_stale: + xfs_buf_stale(bp); + bp->b_flags |= XBF_DONE; + bp->b_flags &= ~XBF_WRITE; + trace_xfs_buf_error_relse(bp, _RET_IP_); + return false; +} + +static void xfs_buf_ioend( struct xfs_buf *bp) { - bool read = bp->b_flags & XBF_READ; - trace_xfs_buf_iodone(bp, _RET_IP_); - bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); - /* * Pull in IO completion errors now. We are guaranteed to be running * single threaded, so we don't need the lock to read b_io_error. @@ -1187,18 +1298,44 @@ xfs_buf_ioend( if (!bp->b_error && bp->b_io_error) xfs_buf_ioerror(bp, bp->b_io_error); - /* Only validate buffers that were read without errors */ - if (read && !bp->b_error && bp->b_ops) { - ASSERT(!bp->b_iodone); - bp->b_ops->verify_read(bp); + if (bp->b_flags & XBF_READ) { + if (!bp->b_error && bp->b_ops) + bp->b_ops->verify_read(bp); + if (!bp->b_error) + bp->b_flags |= XBF_DONE; + } else { + if (!bp->b_error) { + bp->b_flags &= ~XBF_WRITE_FAIL; + bp->b_flags |= XBF_DONE; + } + + if (unlikely(bp->b_error) && xfs_buf_ioend_handle_error(bp)) + return; + + /* clear the retry state */ + bp->b_last_error = 0; + bp->b_retries = 0; + bp->b_first_retry_time = 0; + + /* + * Note that for things like remote attribute buffers, there may + * not be a buffer log item here, so processing the buffer log + * item must remain optional. + */ + if (bp->b_log_item) + xfs_buf_item_done(bp); + + if (bp->b_flags & _XBF_INODES) + xfs_buf_inode_iodone(bp); + else if (bp->b_flags & _XBF_DQUOTS) + xfs_buf_dquot_iodone(bp); + } - if (!bp->b_error) - bp->b_flags |= XBF_DONE; + bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD | + _XBF_LOGRECOVERY); - if (bp->b_iodone) - (*(bp->b_iodone))(bp); - else if (bp->b_flags & XBF_ASYNC) + if (bp->b_flags & XBF_ASYNC) xfs_buf_relse(bp); else complete(&bp->b_iowait); @@ -1209,7 +1346,7 @@ xfs_buf_ioend_work( struct work_struct *work) { struct xfs_buf *bp = - container_of(work, xfs_buf_t, b_ioend_work); + container_of(work, struct xfs_buf, b_ioend_work); xfs_buf_ioend(bp); } @@ -1224,7 +1361,7 @@ xfs_buf_ioend_async( void __xfs_buf_ioerror( - xfs_buf_t *bp, + struct xfs_buf *bp, int error, xfs_failaddr_t failaddr) { @@ -1238,10 +1375,26 @@ xfs_buf_ioerror_alert( struct xfs_buf *bp, xfs_failaddr_t func) { - xfs_alert(bp->b_mount, -"metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d", - func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length, - -bp->b_error); + xfs_buf_alert_ratelimited(bp, "XFS: metadata IO error", + "metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d", + func, (uint64_t)xfs_buf_daddr(bp), + bp->b_length, -bp->b_error); +} + +/* + * To simulate an I/O failure, the buffer must be locked and held with at least + * three references. The LRU reference is dropped by the stale call. The buf + * item reference is dropped via ioend processing. The third reference is owned + * by the caller and is dropped on I/O completion if the buffer is XBF_ASYNC. + */ +void +xfs_buf_ioend_fail( + struct xfs_buf *bp) +{ + bp->b_flags &= ~XBF_DONE; + xfs_buf_stale(bp); + xfs_buf_ioerror(bp, -EIO); + xfs_buf_ioend(bp); } int @@ -1254,7 +1407,7 @@ xfs_bwrite( bp->b_flags |= XBF_WRITE; bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | - XBF_WRITE_FAIL | XBF_DONE); + XBF_DONE); error = xfs_buf_submit(bp); if (error) @@ -1268,6 +1421,11 @@ xfs_buf_bio_end_io( { struct xfs_buf *bp = (struct xfs_buf *)bio->bi_private; + if (!bio->bi_status && + (bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) && + XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR)) + bio->bi_status = BLK_STS_IOERR; + /* * don't overwrite existing errors - otherwise we can lose errors on * buffers that require multiple bios to complete. @@ -1292,10 +1450,10 @@ xfs_buf_ioapply_map( int map, int *buf_offset, int *count, - int op) + blk_opf_t op) { int page_index; - int total_nr_pages = bp->b_page_count; + unsigned int total_nr_pages = bp->b_page_count; int nr_pages; struct bio *bio; sector_t sector = bp->b_maps[map].bm_bn; @@ -1320,14 +1478,12 @@ xfs_buf_ioapply_map( next_chunk: atomic_inc(&bp->b_io_remaining); - nr_pages = min(total_nr_pages, BIO_MAX_PAGES); + nr_pages = bio_max_segs(total_nr_pages); - bio = bio_alloc(GFP_NOIO, nr_pages); - bio_set_dev(bio, bp->b_target->bt_bdev); + bio = bio_alloc(bp->b_target->bt_bdev, nr_pages, op, GFP_NOIO); bio->bi_iter.bi_sector = sector; bio->bi_end_io = xfs_buf_bio_end_io; bio->bi_private = bp; - bio->bi_opf = op; for (; size && nr_pages; nr_pages--, page_index++) { int rbytes, nbytes = PAGE_SIZE - offset; @@ -1371,7 +1527,7 @@ _xfs_buf_ioapply( struct xfs_buf *bp) { struct blk_plug plug; - int op; + blk_opf_t op; int offset; int size; int i; @@ -1397,17 +1553,18 @@ _xfs_buf_ioapply( SHUTDOWN_CORRUPT_INCORE); return; } - } else if (bp->b_bn != XFS_BUF_DADDR_NULL) { + } else if (bp->b_rhash_key != XFS_BUF_DADDR_NULL) { struct xfs_mount *mp = bp->b_mount; /* * non-crc filesystems don't attach verifiers during * log recovery, so don't warn for such filesystems. */ - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_has_crc(mp)) { xfs_warn(mp, "%s: no buf ops on daddr 0x%llx len %d", - __func__, bp->b_bn, bp->b_length); + __func__, xfs_buf_daddr(bp), + bp->b_length); xfs_hex_dump(bp->b_addr, XFS_CORRUPTION_DUMP_LEN); dump_stack(); @@ -1463,7 +1620,7 @@ xfs_buf_iowait( * safe to reference the buffer after a call to this function unless the caller * holds an additional reference itself. */ -int +static int __xfs_buf_submit( struct xfs_buf *bp, bool wait) @@ -1474,12 +1631,24 @@ __xfs_buf_submit( ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); - /* on shutdown we stale and complete the buffer immediately */ - if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { - xfs_buf_ioerror(bp, -EIO); - bp->b_flags &= ~XBF_DONE; - xfs_buf_stale(bp); - xfs_buf_ioend(bp); + /* + * On log shutdown we stale and complete the buffer immediately. We can + * be called to read the superblock before the log has been set up, so + * be careful checking the log state. + * + * Checking the mount shutdown state here can result in the log tail + * moving inappropriately on disk as the log may not yet be shut down. + * i.e. failing this buffer on mount shutdown can remove it from the AIL + * and move the tail of the log forwards without having written this + * buffer to disk. This corrupts the log tail state in memory, and + * because the log may not be shut down yet, it can then be propagated + * to disk before the log is shutdown. Hence we check log shutdown + * state here rather than mount state to avoid corrupting the log tail + * on shutdown. + */ + if (bp->b_mount->m_log && + xlog_is_shutdown(bp->b_mount->m_log)) { + xfs_buf_ioend_fail(bp); return -EIO; } @@ -1540,7 +1709,6 @@ xfs_buf_offset( if (bp->b_addr) return bp->b_addr + offset; - offset += bp->b_offset; page = bp->b_pages[offset >> PAGE_SHIFT]; return page_address(page) + (offset & (PAGE_SIZE-1)); } @@ -1573,6 +1741,28 @@ xfs_buf_zero( } /* + * Log a message about and stale a buffer that a caller has decided is corrupt. + * + * This function should be called for the kinds of metadata corruption that + * cannot be detect from a verifier, such as incorrect inter-block relationship + * data. Do /not/ call this function from a verifier function. + * + * The buffer must be XBF_DONE prior to the call. Afterwards, the buffer will + * be marked stale, but b_error will not be set. The caller is responsible for + * releasing the buffer or fixing it. + */ +void +__xfs_buf_mark_corrupt( + struct xfs_buf *bp, + xfs_failaddr_t fa) +{ + ASSERT(bp->b_flags & XBF_DONE); + + xfs_buf_corruption_error(bp, fa); + xfs_buf_stale(bp); +} + +/* * Handling of buffer targets (buftargs). */ @@ -1582,7 +1772,7 @@ xfs_buf_zero( * while freeing all the buffers only held by the LRU. */ static enum lru_status -xfs_buftarg_wait_rele( +xfs_buftarg_drain_rele( struct list_head *item, struct list_lru_one *lru, spinlock_t *lru_lock, @@ -1594,7 +1784,7 @@ xfs_buftarg_wait_rele( if (atomic_read(&bp->b_hold) > 1) { /* need to wait, so skip it this pass */ - trace_xfs_buf_wait_buftarg(bp, _RET_IP_); + trace_xfs_buf_drain_buftarg(bp, _RET_IP_); return LRU_SKIP; } if (!spin_trylock(&bp->b_lock)) @@ -1611,13 +1801,13 @@ xfs_buftarg_wait_rele( return LRU_REMOVED; } +/* + * Wait for outstanding I/O on the buftarg to complete. + */ void -xfs_wait_buftarg( +xfs_buftarg_wait( struct xfs_buftarg *btp) { - LIST_HEAD(dispose); - int loop = 0; - /* * First wait on the buftarg I/O count for all in-flight buffers to be * released. This is critical as new buffers do not make the LRU until @@ -1633,10 +1823,21 @@ xfs_wait_buftarg( while (percpu_counter_sum(&btp->bt_io_count)) delay(100); flush_workqueue(btp->bt_mount->m_buf_workqueue); +} + +void +xfs_buftarg_drain( + struct xfs_buftarg *btp) +{ + LIST_HEAD(dispose); + int loop = 0; + bool write_fail = false; + + xfs_buftarg_wait(btp); /* loop until there is nothing left on the lru list. */ while (list_lru_count(&btp->bt_lru)) { - list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele, + list_lru_walk(&btp->bt_lru, xfs_buftarg_drain_rele, &dispose, LONG_MAX); while (!list_empty(&dispose)) { @@ -1644,17 +1845,29 @@ xfs_wait_buftarg( bp = list_first_entry(&dispose, struct xfs_buf, b_lru); list_del_init(&bp->b_lru); if (bp->b_flags & XBF_WRITE_FAIL) { - xfs_alert(btp->bt_mount, + write_fail = true; + xfs_buf_alert_ratelimited(bp, + "XFS: Corruption Alert", "Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!", - (long long)bp->b_bn); - xfs_alert(btp->bt_mount, -"Please run xfs_repair to determine the extent of the problem."); + (long long)xfs_buf_daddr(bp)); } xfs_buf_rele(bp); } if (loop++ != 0) delay(100); } + + /* + * If one or more failed buffers were freed, that means dirty metadata + * was thrown away. This should only ever happen after I/O completion + * handling has elevated I/O error(s) to permanent failures and shuts + * down the journal. + */ + if (write_fail) { + ASSERT(xlog_is_shutdown(btp->bt_mount->m_log)); + xfs_alert(btp->bt_mount, + "Please run xfs_repair to determine the extent of the problem."); + } } static enum lru_status @@ -1731,7 +1944,8 @@ xfs_free_buftarg( percpu_counter_destroy(&btp->bt_io_count); list_lru_destroy(&btp->bt_lru); - xfs_blkdev_issue_flush(btp); + blkdev_issue_flush(btp->bt_bdev); + fs_put_dax(btp->bt_daxdev, btp->bt_mount); kmem_free(btp); } @@ -1772,20 +1986,31 @@ xfs_setsize_buftarg_early( return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev)); } -xfs_buftarg_t * +struct xfs_buftarg * xfs_alloc_buftarg( struct xfs_mount *mp, - struct block_device *bdev, - struct dax_device *dax_dev) + struct block_device *bdev) { xfs_buftarg_t *btp; + const struct dax_holder_operations *ops = NULL; +#if defined(CONFIG_FS_DAX) && defined(CONFIG_MEMORY_FAILURE) + ops = &xfs_dax_holder_operations; +#endif btp = kmem_zalloc(sizeof(*btp), KM_NOFS); btp->bt_mount = mp; btp->bt_dev = bdev->bd_dev; btp->bt_bdev = bdev; - btp->bt_daxdev = dax_dev; + btp->bt_daxdev = fs_dax_get_by_bdev(bdev, &btp->bt_dax_part_off, + mp, ops); + + /* + * Buffer IO error rate limiting. Limit it to no more than 10 messages + * per 30 seconds so as to not spam logs too much on repeated errors. + */ + ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ, + DEFAULT_RATELIMIT_BURST); if (xfs_setsize_buftarg_early(btp, bdev)) goto error_free; @@ -1800,7 +2025,8 @@ xfs_alloc_buftarg( btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan; btp->bt_shrinker.seeks = DEFAULT_SEEKS; btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE; - if (register_shrinker(&btp->bt_shrinker)) + if (register_shrinker(&btp->bt_shrinker, "xfs-buf:%s", + mp->m_super->s_id)) goto error_pcpu; return btp; @@ -1890,9 +2116,9 @@ xfs_buf_delwri_queue( */ static int xfs_buf_cmp( - void *priv, - struct list_head *a, - struct list_head *b) + void *priv, + const struct list_head *a, + const struct list_head *b) { struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list); struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); @@ -1927,12 +2153,13 @@ xfs_buf_delwri_submit_buffers( blk_start_plug(&plug); list_for_each_entry_safe(bp, n, buffer_list, b_list) { if (!wait_list) { + if (!xfs_buf_trylock(bp)) + continue; if (xfs_buf_ispinned(bp)) { + xfs_buf_unlock(bp); pinned++; continue; } - if (!xfs_buf_trylock(bp)) - continue; } else { xfs_buf_lock(bp); } @@ -1957,7 +2184,7 @@ xfs_buf_delwri_submit_buffers( * synchronously. Otherwise, drop the buffer from the delwri * queue and submit async. */ - bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_WRITE_FAIL); + bp->b_flags &= ~_XBF_DELWRI_Q; bp->b_flags |= XBF_WRITE; if (wait_list) { bp->b_flags &= ~XBF_ASYNC; @@ -2088,27 +2315,6 @@ xfs_buf_delwri_pushbuf( return error; } -int __init -xfs_buf_init(void) -{ - xfs_buf_zone = kmem_cache_create("xfs_buf", - sizeof(struct xfs_buf), 0, - SLAB_HWCACHE_ALIGN, NULL); - if (!xfs_buf_zone) - goto out; - - return 0; - - out: - return -ENOMEM; -} - -void -xfs_buf_terminate(void) -{ - kmem_cache_destroy(xfs_buf_zone); -} - void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) { /* @@ -2135,7 +2341,7 @@ xfs_verify_magic( struct xfs_mount *mp = bp->b_mount; int idx; - idx = xfs_sb_version_hascrc(&mp->m_sb); + idx = xfs_has_crc(mp); if (WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx])) return false; return dmagic == bp->b_ops->magic[idx]; @@ -2153,7 +2359,7 @@ xfs_verify_magic16( struct xfs_mount *mp = bp->b_mount; int idx; - idx = xfs_sb_version_hascrc(&mp->m_sb); + idx = xfs_has_crc(mp); if (WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx])) return false; return dmagic == bp->b_ops->magic16[idx]; |