From e9c4973638beff4b346d802fd9443975b041035d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Sep 2016 11:09:12 +1000 Subject: xfs: move xfs_bmbt_to_iomap up We'll need it earlier in the file soon, so the unchanged function to the top of xfs_iomap.c Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_iomap.c | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) (limited to 'fs/xfs/xfs_iomap.c') diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 2af0dda1c978..ba3592f3b98c 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -44,6 +44,32 @@ << mp->m_writeio_log) #define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP +void +xfs_bmbt_to_iomap( + struct xfs_inode *ip, + struct iomap *iomap, + struct xfs_bmbt_irec *imap) +{ + struct xfs_mount *mp = ip->i_mount; + + if (imap->br_startblock == HOLESTARTBLOCK) { + iomap->blkno = IOMAP_NULL_BLOCK; + iomap->type = IOMAP_HOLE; + } else if (imap->br_startblock == DELAYSTARTBLOCK) { + iomap->blkno = IOMAP_NULL_BLOCK; + iomap->type = IOMAP_DELALLOC; + } else { + iomap->blkno = xfs_fsb_to_db(ip, imap->br_startblock); + if (imap->br_state == XFS_EXT_UNWRITTEN) + iomap->type = IOMAP_UNWRITTEN; + else + iomap->type = IOMAP_MAPPED; + } + iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff); + iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount); + iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip)); +} + STATIC int xfs_iomap_eof_align_last_fsb( xfs_mount_t *mp, @@ -947,32 +973,6 @@ error_on_bmapi_transaction: return error; } -void -xfs_bmbt_to_iomap( - struct xfs_inode *ip, - struct iomap *iomap, - struct xfs_bmbt_irec *imap) -{ - struct xfs_mount *mp = ip->i_mount; - - if (imap->br_startblock == HOLESTARTBLOCK) { - iomap->blkno = IOMAP_NULL_BLOCK; - iomap->type = IOMAP_HOLE; - } else if (imap->br_startblock == DELAYSTARTBLOCK) { - iomap->blkno = IOMAP_NULL_BLOCK; - iomap->type = IOMAP_DELALLOC; - } else { - iomap->blkno = xfs_fsb_to_db(ip, imap->br_startblock); - if (imap->br_state == XFS_EXT_UNWRITTEN) - iomap->type = IOMAP_UNWRITTEN; - else - iomap->type = IOMAP_MAPPED; - } - iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff); - iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount); - iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip)); -} - static inline bool imap_needs_alloc(struct xfs_bmbt_irec *imap, int nimaps) { return !nimaps || -- cgit v1.2.3-59-g8ed1b From f8e3a8257538af8dbdd15d098c0cfba6ccbabe7a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Sep 2016 11:09:28 +1000 Subject: xfs: factor our a helper to calculate the EOF alignment And drop the pointless mp argument to xfs_iomap_eof_align_last_fsb, while we're at it. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_iomap.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'fs/xfs/xfs_iomap.c') diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index ba3592f3b98c..918511ae115c 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -70,15 +70,13 @@ xfs_bmbt_to_iomap( iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip)); } -STATIC int -xfs_iomap_eof_align_last_fsb( - xfs_mount_t *mp, - xfs_inode_t *ip, - xfs_extlen_t extsize, - xfs_fileoff_t *last_fsb) +static xfs_extlen_t +xfs_eof_alignment( + struct xfs_inode *ip, + xfs_extlen_t extsize) { - xfs_extlen_t align = 0; - int eof, error; + struct xfs_mount *mp = ip->i_mount; + xfs_extlen_t align = 0; if (!XFS_IS_REALTIME_INODE(ip)) { /* @@ -109,8 +107,21 @@ xfs_iomap_eof_align_last_fsb( align = extsize; } + return align; +} + +STATIC int +xfs_iomap_eof_align_last_fsb( + struct xfs_inode *ip, + xfs_extlen_t extsize, + xfs_fileoff_t *last_fsb) +{ + xfs_extlen_t align = xfs_eof_alignment(ip, extsize); + if (align) { xfs_fileoff_t new_last_fsb = roundup_64(*last_fsb, align); + int eof, error; + error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof); if (error) return error; @@ -180,7 +191,7 @@ xfs_iomap_write_direct( */ ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags & XFS_IFEXTENTS); - error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); + error = xfs_iomap_eof_align_last_fsb(ip, extsz, &last_fsb); if (error) goto out_unlock; } else { @@ -638,7 +649,7 @@ retry: } if (prealloc || extsz) { - error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); + error = xfs_iomap_eof_align_last_fsb(ip, extsz, &last_fsb); if (error) return error; } -- cgit v1.2.3-59-g8ed1b From 51446f5ba44874db4d2a93a6eb61b133e5ec1b3e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Sep 2016 11:10:21 +1000 Subject: xfs: rewrite and optimize the delalloc write path Currently xfs_iomap_write_delay does up to lookups in the inode extent tree, which is rather costly especially with the new iomap based write path and small write sizes. But it turns out that the low-level xfs_bmap_search_extents gives us all the information we need in the regular delalloc buffered write path: - it will return us an extent covering the block we are looking up if it exists. In that case we can simply return that extent to the caller and are done - it will tell us if we are beyoned the last current allocated block with an eof return parameter. In that case we can create a delalloc reservation and use the also returned information about the last extent in the file as the hint to size our delalloc reservation. - it can tell us that we are writing into a hole, but that there is an extent beyoned this hole. In this case we can create a delalloc reservation that covers the requested size (possible capped to the next existing allocation). All that can be done in one single routine instead of bouncing up and down a few layers. This reduced the CPU overhead of the block mapping routines and also simplified the code a lot. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_bmap.c | 89 +---------- fs/xfs/libxfs/xfs_bmap.h | 10 +- fs/xfs/xfs_iomap.c | 395 +++++++++++++++++++++-------------------------- fs/xfs/xfs_iomap.h | 2 - 4 files changed, 181 insertions(+), 315 deletions(-) (limited to 'fs/xfs/xfs_iomap.c') diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index b060bca93402..614803bc8a9f 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1388,7 +1388,7 @@ xfs_bmap_search_multi_extents( * Else, *lastxp will be set to the index of the found * entry; *gotp will contain the entry. */ -STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */ +xfs_bmbt_rec_host_t * /* pointer to found extent entry */ xfs_bmap_search_extents( xfs_inode_t *ip, /* incore inode pointer */ xfs_fileoff_t bno, /* block number searched for */ @@ -4074,7 +4074,7 @@ xfs_bmapi_read( return 0; } -STATIC int +int xfs_bmapi_reserve_delalloc( struct xfs_inode *ip, xfs_fileoff_t aoff, @@ -4170,91 +4170,6 @@ out_unreserve_quota: return error; } -/* - * Map file blocks to filesystem blocks, adding delayed allocations as needed. - */ -int -xfs_bmapi_delay( - struct xfs_inode *ip, /* incore inode */ - xfs_fileoff_t bno, /* starting file offs. mapped */ - xfs_filblks_t len, /* length to map in file */ - struct xfs_bmbt_irec *mval, /* output: map values */ - int *nmap, /* i/o: mval size/count */ - int flags) /* XFS_BMAPI_... */ -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); - struct xfs_bmbt_irec got; /* current file extent record */ - struct xfs_bmbt_irec prev; /* previous file extent record */ - xfs_fileoff_t obno; /* old block number (offset) */ - xfs_fileoff_t end; /* end of mapped file region */ - xfs_extnum_t lastx; /* last useful extent number */ - int eof; /* we've hit the end of extents */ - int n = 0; /* current extent index */ - int error = 0; - - ASSERT(*nmap >= 1); - ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); - ASSERT(!(flags & ~XFS_BMAPI_ENTIRE)); - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - - if (unlikely(XFS_TEST_ERROR( - (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE), - mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { - XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp); - return -EFSCORRUPTED; - } - - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - - XFS_STATS_INC(mp, xs_blk_mapw); - - if (!(ifp->if_flags & XFS_IFEXTENTS)) { - error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); - if (error) - return error; - } - - xfs_bmap_search_extents(ip, bno, XFS_DATA_FORK, &eof, &lastx, &got, &prev); - end = bno + len; - obno = bno; - - while (bno < end && n < *nmap) { - if (eof || got.br_startoff > bno) { - error = xfs_bmapi_reserve_delalloc(ip, bno, len, &got, - &prev, &lastx, eof); - if (error) { - if (n == 0) { - *nmap = 0; - return error; - } - break; - } - } - - /* set up the extent map to return. */ - xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags); - xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); - - /* If we're done, stop now. */ - if (bno >= end || n >= *nmap) - break; - - /* Else go on to the next record. */ - prev = got; - if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got); - else - eof = 1; - } - - *nmap = n; - return 0; -} - - static int xfs_bmapi_allocate( struct xfs_bmalloca *bma) diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 254034f96941..d66006960fbc 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -181,9 +181,6 @@ int xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip, int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, struct xfs_bmbt_irec *mval, int *nmap, int flags); -int xfs_bmapi_delay(struct xfs_inode *ip, xfs_fileoff_t bno, - xfs_filblks_t len, struct xfs_bmbt_irec *mval, - int *nmap, int flags); int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, int flags, xfs_fsblock_t *firstblock, xfs_extlen_t total, @@ -202,5 +199,12 @@ int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, struct xfs_defer_ops *dfops, enum shift_direction direction, int num_exts); int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset); +struct xfs_bmbt_rec_host * + xfs_bmap_search_extents(struct xfs_inode *ip, xfs_fileoff_t bno, + int fork, int *eofp, xfs_extnum_t *lastxp, + struct xfs_bmbt_irec *gotp, struct xfs_bmbt_irec *prevp); +int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, xfs_fileoff_t aoff, + xfs_filblks_t len, struct xfs_bmbt_irec *got, + struct xfs_bmbt_irec *prev, xfs_extnum_t *lastx, int eof); #endif /* __XFS_BMAP_H__ */ diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 918511ae115c..f96c8ffce5f4 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * Copyright (c) 2016 Christoph Hellwig. * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -42,7 +43,6 @@ #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ << mp->m_writeio_log) -#define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP void xfs_bmbt_to_iomap( @@ -311,130 +311,6 @@ out_trans_cancel: goto out_unlock; } -/* - * If the caller is doing a write at the end of the file, then extend the - * allocation out to the file system's write iosize. We clean up any extra - * space left over when the file is closed in xfs_inactive(). - * - * If we find we already have delalloc preallocation beyond EOF, don't do more - * preallocation as it it not needed. - */ -STATIC int -xfs_iomap_eof_want_preallocate( - xfs_mount_t *mp, - xfs_inode_t *ip, - xfs_off_t offset, - size_t count, - xfs_bmbt_irec_t *imap, - int nimaps, - int *prealloc) -{ - xfs_fileoff_t start_fsb; - xfs_filblks_t count_fsb; - int n, error, imaps; - int found_delalloc = 0; - - *prealloc = 0; - if (offset + count <= XFS_ISIZE(ip)) - return 0; - - /* - * If the file is smaller than the minimum prealloc and we are using - * dynamic preallocation, don't do any preallocation at all as it is - * likely this is the only write to the file that is going to be done. - */ - if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) && - XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_writeio_blocks)) - return 0; - - /* - * If there are any real blocks past eof, then don't - * do any speculative allocation. - */ - start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1))); - count_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); - while (count_fsb > 0) { - imaps = nimaps; - error = xfs_bmapi_read(ip, start_fsb, count_fsb, imap, &imaps, - 0); - if (error) - return error; - for (n = 0; n < imaps; n++) { - if ((imap[n].br_startblock != HOLESTARTBLOCK) && - (imap[n].br_startblock != DELAYSTARTBLOCK)) - return 0; - start_fsb += imap[n].br_blockcount; - count_fsb -= imap[n].br_blockcount; - - if (imap[n].br_startblock == DELAYSTARTBLOCK) - found_delalloc = 1; - } - } - if (!found_delalloc) - *prealloc = 1; - return 0; -} - -/* - * Determine the initial size of the preallocation. We are beyond the current - * EOF here, but we need to take into account whether this is a sparse write or - * an extending write when determining the preallocation size. Hence we need to - * look up the extent that ends at the current write offset and use the result - * to determine the preallocation size. - * - * If the extent is a hole, then preallocation is essentially disabled. - * Otherwise we take the size of the preceeding data extent as the basis for the - * preallocation size. If the size of the extent is greater than half the - * maximum extent length, then use the current offset as the basis. This ensures - * that for large files the preallocation size always extends to MAXEXTLEN - * rather than falling short due to things like stripe unit/width alignment of - * real extents. - */ -STATIC xfs_fsblock_t -xfs_iomap_eof_prealloc_initial_size( - struct xfs_mount *mp, - struct xfs_inode *ip, - xfs_off_t offset, - xfs_bmbt_irec_t *imap, - int nimaps) -{ - xfs_fileoff_t start_fsb; - int imaps = 1; - int error; - - ASSERT(nimaps >= imaps); - - /* if we are using a specific prealloc size, return now */ - if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) - return 0; - - /* If the file is small, then use the minimum prealloc */ - if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign)) - return 0; - - /* - * As we write multiple pages, the offset will always align to the - * start of a page and hence point to a hole at EOF. i.e. if the size is - * 4096 bytes, we only have one block at FSB 0, but XFS_B_TO_FSB(4096) - * will return FSB 1. Hence if there are blocks in the file, we want to - * point to the block prior to the EOF block and not the hole that maps - * directly at @offset. - */ - start_fsb = XFS_B_TO_FSB(mp, offset); - if (start_fsb) - start_fsb--; - error = xfs_bmapi_read(ip, start_fsb, 1, imap, &imaps, XFS_BMAPI_ENTIRE); - if (error) - return 0; - - ASSERT(imaps == 1); - if (imap[0].br_startblock == HOLESTARTBLOCK) - return 0; - if (imap[0].br_blockcount <= (MAXEXTLEN >> 1)) - return imap[0].br_blockcount << 1; - return XFS_B_TO_FSB(mp, offset); -} - STATIC bool xfs_quota_need_throttle( struct xfs_inode *ip, @@ -496,27 +372,76 @@ xfs_quota_calc_throttle( } /* + * If we are doing a write at the end of the file and there are no allocations + * past this one, then extend the allocation out to the file system's write + * iosize. + * * If we don't have a user specified preallocation size, dynamically increase - * the preallocation size as the size of the file grows. Cap the maximum size + * the preallocation size as the size of the file grows. Cap the maximum size * at a single extent or less if the filesystem is near full. The closer the * filesystem is to full, the smaller the maximum prealocation. + * + * As an exception we don't do any preallocation at all if the file is smaller + * than the minimum preallocation and we are using the default dynamic + * preallocation scheme, as it is likely this is the only write to the file that + * is going to be done. + * + * We clean up any extra space left over when the file is closed in + * xfs_inactive(). */ STATIC xfs_fsblock_t xfs_iomap_prealloc_size( - struct xfs_mount *mp, struct xfs_inode *ip, - xfs_off_t offset, - struct xfs_bmbt_irec *imap, - int nimaps) + loff_t offset, + loff_t count, + xfs_extnum_t idx, + struct xfs_bmbt_irec *prev) { - xfs_fsblock_t alloc_blocks = 0; + struct xfs_mount *mp = ip->i_mount; + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); int shift = 0; int64_t freesp; xfs_fsblock_t qblocks; int qshift = 0; + xfs_fsblock_t alloc_blocks = 0; + + if (offset + count <= XFS_ISIZE(ip)) + return 0; + + if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) && + (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_writeio_blocks))) + return 0; - alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, offset, - imap, nimaps); + /* + * If an explicit allocsize is set, the file is small, or we + * are writing behind a hole, then use the minimum prealloc: + */ + if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) || + XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) || + idx == 0 || + prev->br_startoff + prev->br_blockcount < offset_fsb) + return mp->m_writeio_blocks; + + /* + * Determine the initial size of the preallocation. We are beyond the + * current EOF here, but we need to take into account whether this is + * a sparse write or an extending write when determining the + * preallocation size. Hence we need to look up the extent that ends + * at the current write offset and use the result to determine the + * preallocation size. + * + * If the extent is a hole, then preallocation is essentially disabled. + * Otherwise we take the size of the preceding data extent as the basis + * for the preallocation size. If the size of the extent is greater than + * half the maximum extent length, then use the current offset as the + * basis. This ensures that for large files the preallocation size + * always extends to MAXEXTLEN rather than falling short due to things + * like stripe unit/width alignment of real extents. + */ + if (prev->br_blockcount <= (MAXEXTLEN >> 1)) + alloc_blocks = prev->br_blockcount << 1; + else + alloc_blocks = XFS_B_TO_FSB(mp, offset); if (!alloc_blocks) goto check_writeio; qblocks = alloc_blocks; @@ -587,120 +512,145 @@ xfs_iomap_prealloc_size( */ while (alloc_blocks && alloc_blocks >= freesp) alloc_blocks >>= 4; - check_writeio: if (alloc_blocks < mp->m_writeio_blocks) alloc_blocks = mp->m_writeio_blocks; - trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift, mp->m_writeio_blocks); - return alloc_blocks; } -int -xfs_iomap_write_delay( - xfs_inode_t *ip, - xfs_off_t offset, - size_t count, - xfs_bmbt_irec_t *ret_imap) +static int +xfs_file_iomap_begin_delay( + struct inode *inode, + loff_t offset, + loff_t count, + unsigned flags, + struct iomap *iomap) { - xfs_mount_t *mp = ip->i_mount; - xfs_fileoff_t offset_fsb; - xfs_fileoff_t last_fsb; - xfs_off_t aligned_offset; - xfs_fileoff_t ioalign; - xfs_extlen_t extsz; - int nimaps; - xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; - int prealloc; - int error; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - - /* - * Make sure that the dquots are there. This doesn't hold - * the ilock across a disk read. - */ - error = xfs_qm_dqattach_locked(ip, 0); - if (error) - return error; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); + xfs_fileoff_t maxbytes_fsb = + XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); + xfs_fileoff_t end_fsb, orig_end_fsb; + int error = 0, eof = 0; + struct xfs_bmbt_irec got; + struct xfs_bmbt_irec prev; + xfs_extnum_t idx; - extsz = xfs_get_extsz_hint(ip); - offset_fsb = XFS_B_TO_FSBT(mp, offset); + ASSERT(!XFS_IS_REALTIME_INODE(ip)); + ASSERT(!xfs_get_extsz_hint(ip)); - error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, - imap, XFS_WRITE_IMAPS, &prealloc); - if (error) - return error; + xfs_ilock(ip, XFS_ILOCK_EXCL); -retry: - if (prealloc) { - xfs_fsblock_t alloc_blocks; + if (unlikely(XFS_TEST_ERROR( + (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE), + mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + error = -EFSCORRUPTED; + goto out_unlock; + } - alloc_blocks = xfs_iomap_prealloc_size(mp, ip, offset, imap, - XFS_WRITE_IMAPS); + XFS_STATS_INC(mp, xs_blk_mapw); - aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); - ioalign = XFS_B_TO_FSBT(mp, aligned_offset); - last_fsb = ioalign + alloc_blocks; - } else { - last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); + if (error) + goto out_unlock; } - if (prealloc || extsz) { - error = xfs_iomap_eof_align_last_fsb(ip, extsz, &last_fsb); - if (error) - return error; + xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx, + &got, &prev); + if (!eof && got.br_startoff <= offset_fsb) { + trace_xfs_iomap_found(ip, offset, count, 0, &got); + goto done; } + error = xfs_qm_dqattach_locked(ip, 0); + if (error) + goto out_unlock; + /* - * Make sure preallocation does not create extents beyond the range we - * actually support in this filesystem. + * We cap the maximum length we map here to MAX_WRITEBACK_PAGES pages + * to keep the chunks of work done where somewhat symmetric with the + * work writeback does. This is a completely arbitrary number pulled + * out of thin air as a best guess for initial testing. + * + * Note that the values needs to be less than 32-bits wide until + * the lower level functions are updated. */ - if (last_fsb > XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes)) - last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); + count = min_t(loff_t, count, 1024 * PAGE_SIZE); + end_fsb = orig_end_fsb = + min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); - ASSERT(last_fsb > offset_fsb); + if (eof) { + xfs_fsblock_t prealloc_blocks; - nimaps = XFS_WRITE_IMAPS; - error = xfs_bmapi_delay(ip, offset_fsb, last_fsb - offset_fsb, - imap, &nimaps, XFS_BMAPI_ENTIRE); + prealloc_blocks = + xfs_iomap_prealloc_size(ip, offset, count, idx, &prev); + if (prealloc_blocks) { + xfs_extlen_t align; + xfs_off_t end_offset; + + end_offset = XFS_WRITEIO_ALIGN(mp, offset + count - 1); + end_fsb = XFS_B_TO_FSBT(mp, end_offset) + + prealloc_blocks; + + align = xfs_eof_alignment(ip, 0); + if (align) + end_fsb = roundup_64(end_fsb, align); + + end_fsb = min(end_fsb, maxbytes_fsb); + ASSERT(end_fsb > offset_fsb); + } + } + +retry: + error = xfs_bmapi_reserve_delalloc(ip, offset_fsb, + end_fsb - offset_fsb, &got, + &prev, &idx, eof); switch (error) { case 0: + break; case -ENOSPC: case -EDQUOT: - break; - default: - return error; - } - - /* - * If bmapi returned us nothing, we got either ENOSPC or EDQUOT. Retry - * without EOF preallocation. - */ - if (nimaps == 0) { + /* retry without any preallocation */ trace_xfs_delalloc_enospc(ip, offset, count); - if (prealloc) { - prealloc = 0; - error = 0; + if (end_fsb != orig_end_fsb) { + end_fsb = orig_end_fsb; goto retry; } - return error ? error : -ENOSPC; + /*FALLTHRU*/ + default: + goto out_unlock; } - if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip))) - return xfs_alert_fsblock_zero(ip, &imap[0]); - /* * Tag the inode as speculatively preallocated so we can reclaim this * space on demand, if necessary. */ - if (prealloc) + if (end_fsb != orig_end_fsb) xfs_inode_set_eofblocks_tag(ip); - *ret_imap = imap[0]; - return 0; + trace_xfs_iomap_alloc(ip, offset, count, 0, &got); +done: + if (isnullstartblock(got.br_startblock)) + got.br_startblock = DELAYSTARTBLOCK; + + if (!got.br_startblock) { + error = xfs_alert_fsblock_zero(ip, &got); + if (error) + goto out_unlock; + } + + xfs_bmbt_to_iomap(ip, iomap, &got); + +out_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; } /* @@ -1008,6 +958,11 @@ xfs_file_iomap_begin( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; + if ((flags & IOMAP_WRITE) && !xfs_get_extsz_hint(ip)) { + return xfs_file_iomap_begin_delay(inode, offset, length, flags, + iomap); + } + xfs_ilock(ip, XFS_ILOCK_EXCL); ASSERT(offset <= mp->m_super->s_maxbytes); @@ -1035,19 +990,13 @@ xfs_file_iomap_begin( * the lower level functions are updated. */ length = min_t(loff_t, length, 1024 * PAGE_SIZE); - if (xfs_get_extsz_hint(ip)) { - /* - * xfs_iomap_write_direct() expects the shared lock. It - * is unlocked on return. - */ - xfs_ilock_demote(ip, XFS_ILOCK_EXCL); - error = xfs_iomap_write_direct(ip, offset, length, &imap, - nimaps); - } else { - error = xfs_iomap_write_delay(ip, offset, length, &imap); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - } - + /* + * xfs_iomap_write_direct() expects the shared lock. It + * is unlocked on return. + */ + xfs_ilock_demote(ip, XFS_ILOCK_EXCL); + error = xfs_iomap_write_direct(ip, offset, length, &imap, + nimaps); if (error) return error; diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index fb8aca3d69ab..6498be485932 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -25,8 +25,6 @@ struct xfs_bmbt_irec; int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, struct xfs_bmbt_irec *, int); -int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, - struct xfs_bmbt_irec *); int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, struct xfs_bmbt_irec *); int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t); -- cgit v1.2.3-59-g8ed1b From ecd50729f772f0b982ddbb76c16ee4bc8f495e17 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Sep 2016 11:24:37 +1000 Subject: iomap: add IOMAP_F_NEW flag Signed-off-by: Christoph Hellwig Reviewed-by: Ross Zwisler Signed-off-by: Dave Chinner --- fs/xfs/xfs_iomap.c | 1 + include/linux/iomap.h | 1 + 2 files changed, 2 insertions(+) (limited to 'fs/xfs/xfs_iomap.c') diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index f96c8ffce5f4..ac677d032e3a 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1000,6 +1000,7 @@ xfs_file_iomap_begin( if (error) return error; + iomap->flags = IOMAP_F_NEW; trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); } else { ASSERT(nimaps); diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 3d70ece10313..14d7067f8b06 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -22,6 +22,7 @@ struct vm_fault; * Flags for iomap mappings: */ #define IOMAP_F_MERGED 0x01 /* contains multiple blocks/extents */ +#define IOMAP_F_NEW 0x02 /* blocks have been newly allocated */ /* * Magic value for blkno: -- cgit v1.2.3-59-g8ed1b From 66642c5c1dea411dd2842159f9f297ce8e914994 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Sep 2016 11:26:39 +1000 Subject: xfs: take the ilock shared if possible in xfs_file_iomap_begin We always just read the extent first, and will later lock exlusively after first dropping the lock in case we actually allocate blocks. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_iomap.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs/xfs/xfs_iomap.c') diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index ac677d032e3a..fe4a26d752d0 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -954,6 +954,7 @@ xfs_file_iomap_begin( struct xfs_bmbt_irec imap; xfs_fileoff_t offset_fsb, end_fsb; int nimaps = 1, error = 0; + unsigned lockmode; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; @@ -963,7 +964,7 @@ xfs_file_iomap_begin( iomap); } - xfs_ilock(ip, XFS_ILOCK_EXCL); + lockmode = xfs_ilock_data_map_shared(ip); ASSERT(offset <= mp->m_super->s_maxbytes); if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) @@ -974,7 +975,7 @@ xfs_file_iomap_begin( error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, XFS_BMAPI_ENTIRE); if (error) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_iunlock(ip, lockmode); return error; } @@ -994,7 +995,8 @@ xfs_file_iomap_begin( * xfs_iomap_write_direct() expects the shared lock. It * is unlocked on return. */ - xfs_ilock_demote(ip, XFS_ILOCK_EXCL); + if (lockmode == XFS_ILOCK_EXCL) + xfs_ilock_demote(ip, lockmode); error = xfs_iomap_write_direct(ip, offset, length, &imap, nimaps); if (error) @@ -1005,7 +1007,7 @@ xfs_file_iomap_begin( } else { ASSERT(nimaps); - xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_iunlock(ip, lockmode); trace_xfs_iomap_found(ip, offset, length, 0, &imap); } -- cgit v1.2.3-59-g8ed1b From 6c31f495d19975b7d2e824ee614934d5db113afe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Sep 2016 11:28:38 +1000 Subject: xfs: use iomap to implement DAX Another users of buffer_heads bytes the dust. Signed-off-by: Christoph Hellwig Reviewed-by: Ross Zwisler Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 61 +++++++++++++++--------------------------------------- fs/xfs/xfs_iomap.c | 11 ++++++---- 2 files changed, 24 insertions(+), 48 deletions(-) (limited to 'fs/xfs/xfs_iomap.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 62649ccdbb4d..f99d7fac5abf 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -332,10 +332,7 @@ xfs_file_dax_read( struct kiocb *iocb, struct iov_iter *to) { - struct address_space *mapping = iocb->ki_filp->f_mapping; - struct inode *inode = mapping->host; - struct xfs_inode *ip = XFS_I(inode); - struct iov_iter data = *to; + struct xfs_inode *ip = XFS_I(iocb->ki_filp->f_mapping->host); size_t count = iov_iter_count(to); ssize_t ret = 0; @@ -345,11 +342,7 @@ xfs_file_dax_read( return 0; /* skip atime */ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); - ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0); - if (ret > 0) { - iocb->ki_pos += ret; - iov_iter_advance(to, ret); - } + ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops); xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); file_accessed(iocb->ki_filp); @@ -711,52 +704,32 @@ xfs_file_dax_write( struct kiocb *iocb, struct iov_iter *from) { - struct address_space *mapping = iocb->ki_filp->f_mapping; - struct inode *inode = mapping->host; + struct inode *inode = iocb->ki_filp->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); - ssize_t ret = 0; int iolock = XFS_IOLOCK_EXCL; - struct iov_iter data; + ssize_t ret, error = 0; + size_t count; + loff_t pos; xfs_rw_ilock(ip, iolock); ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) goto out; - /* - * Yes, even DAX files can have page cache attached to them: A zeroed - * page is inserted into the pagecache when we have to serve a write - * fault on a hole. It should never be dirtied and can simply be - * dropped from the pagecache once we get real data for the page. - * - * XXX: This is racy against mmap, and there's nothing we can do about - * it. dax_do_io() should really do this invalidation internally as - * it will know if we've allocated over a holei for this specific IO and - * if so it needs to update the mapping tree and invalidate existing - * PTEs over the newly allocated range. Remove this invalidation when - * dax_do_io() is fixed up. - */ - if (mapping->nrpages) { - loff_t end = iocb->ki_pos + iov_iter_count(from) - 1; - - ret = invalidate_inode_pages2_range(mapping, - iocb->ki_pos >> PAGE_SHIFT, - end >> PAGE_SHIFT); - WARN_ON_ONCE(ret); - } + pos = iocb->ki_pos; + count = iov_iter_count(from); - trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos); + trace_xfs_file_dax_write(ip, count, pos); - data = *from; - ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, - xfs_end_io_direct_write, 0); - if (ret > 0) { - iocb->ki_pos += ret; - iov_iter_advance(from, ret); + ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops); + if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { + i_size_write(inode, iocb->ki_pos); + error = xfs_setfilesize(ip, pos, ret); } + out: xfs_rw_iunlock(ip, iolock); - return ret; + return error ? error : ret; } STATIC ssize_t @@ -1495,7 +1468,7 @@ xfs_filemap_page_mkwrite( xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (IS_DAX(inode)) { - ret = dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault); + ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops); } else { ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops); ret = block_page_mkwrite_return(ret); @@ -1529,7 +1502,7 @@ xfs_filemap_fault( * changes to xfs_get_blocks_direct() to map unwritten extent * ioend for conversion on read-only mappings. */ - ret = dax_fault(vma, vmf, xfs_get_blocks_dax_fault); + ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops); } else ret = filemap_fault(vma, vmf); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index fe4a26d752d0..c08253e11545 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -934,11 +934,13 @@ error_on_bmapi_transaction: return error; } -static inline bool imap_needs_alloc(struct xfs_bmbt_irec *imap, int nimaps) +static inline bool imap_needs_alloc(struct inode *inode, + struct xfs_bmbt_irec *imap, int nimaps) { return !nimaps || imap->br_startblock == HOLESTARTBLOCK || - imap->br_startblock == DELAYSTARTBLOCK; + imap->br_startblock == DELAYSTARTBLOCK || + (IS_DAX(inode) && ISUNWRITTEN(imap)); } static int @@ -959,7 +961,8 @@ xfs_file_iomap_begin( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if ((flags & IOMAP_WRITE) && !xfs_get_extsz_hint(ip)) { + if ((flags & IOMAP_WRITE) && + !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) { return xfs_file_iomap_begin_delay(inode, offset, length, flags, iomap); } @@ -979,7 +982,7 @@ xfs_file_iomap_begin( return error; } - if ((flags & IOMAP_WRITE) && imap_needs_alloc(&imap, nimaps)) { + if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) { /* * We cap the maximum length we map here to MAX_WRITEBACK_PAGES * pages to keep the chunks of work done where somewhat symmetric -- cgit v1.2.3-59-g8ed1b