diff options
Diffstat (limited to 'fs/xfs/xfs_bmap_util.c')
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 674 |
1 files changed, 380 insertions, 294 deletions
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index e62fb5216341..04d0c2bff67c 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -71,18 +71,24 @@ xfs_zero_extent( #ifdef CONFIG_XFS_RT int xfs_bmap_rtalloc( - struct xfs_bmalloca *ap) /* bmap alloc argument struct */ + struct xfs_bmalloca *ap) { - int error; /* error return value */ - xfs_mount_t *mp; /* mount point structure */ - xfs_extlen_t prod = 0; /* product factor for allocators */ - xfs_extlen_t mod = 0; /* product factor for allocators */ - xfs_extlen_t ralen = 0; /* realtime allocation length */ - xfs_extlen_t align; /* minimum allocation alignment */ - xfs_rtblock_t rtb; - - mp = ap->ip->i_mount; + struct xfs_mount *mp = ap->ip->i_mount; + xfs_fileoff_t orig_offset = ap->offset; + xfs_rtblock_t rtb; + xfs_extlen_t prod = 0; /* product factor for allocators */ + xfs_extlen_t mod = 0; /* product factor for allocators */ + xfs_extlen_t ralen = 0; /* realtime allocation length */ + xfs_extlen_t align; /* minimum allocation alignment */ + xfs_extlen_t orig_length = ap->length; + xfs_extlen_t minlen = mp->m_sb.sb_rextsize; + xfs_extlen_t raminlen; + bool rtlocked = false; + bool ignore_locality = false; + int error; + align = xfs_get_extsz_hint(ap->ip); +retry: prod = align / mp->m_sb.sb_rextsize; error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 1, ap->eof, 0, @@ -93,6 +99,15 @@ xfs_bmap_rtalloc( ASSERT(ap->length % mp->m_sb.sb_rextsize == 0); /* + * If we shifted the file offset downward to satisfy an extent size + * hint, increase minlen by that amount so that the allocator won't + * give us an allocation that's too short to cover at least one of the + * blocks that the caller asked for. + */ + if (ap->offset != orig_offset) + minlen += orig_offset - ap->offset; + + /* * If the offset & length are not perfectly aligned * then kill prod, it will just get us in trouble. */ @@ -104,29 +119,32 @@ xfs_bmap_rtalloc( */ ralen = ap->length / mp->m_sb.sb_rextsize; /* - * If the old value was close enough to MAXEXTLEN that + * If the old value was close enough to XFS_BMBT_MAX_EXTLEN that * we rounded up to it, cut it back so it's valid again. * Note that if it's a really large request (bigger than - * MAXEXTLEN), we don't hear about that number, and can't + * XFS_BMBT_MAX_EXTLEN), we don't hear about that number, and can't * adjust the starting point to match it. */ - if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN) - ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; + if (ralen * mp->m_sb.sb_rextsize >= XFS_MAX_BMBT_EXTLEN) + ralen = XFS_MAX_BMBT_EXTLEN / mp->m_sb.sb_rextsize; /* * Lock out modifications to both the RT bitmap and summary inodes */ - xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP); - xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); - xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM); - xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL); + if (!rtlocked) { + xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP); + xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); + xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM); + xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL); + rtlocked = true; + } /* * If it's an allocation to an empty file at offset 0, * pick an extent that will space things out in the rt area. */ if (ap->eof && ap->offset == 0) { - xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */ + xfs_rtblock_t rtx; /* realtime extent no */ error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx); if (error) @@ -141,33 +159,59 @@ xfs_bmap_rtalloc( /* * Realtime allocation, done through xfs_rtallocate_extent. */ - do_div(ap->blkno, mp->m_sb.sb_rextsize); + if (ignore_locality) + ap->blkno = 0; + else + do_div(ap->blkno, mp->m_sb.sb_rextsize); rtb = ap->blkno; ap->length = ralen; - error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length, - &ralen, ap->wasdel, prod, &rtb); + raminlen = max_t(xfs_extlen_t, 1, minlen / mp->m_sb.sb_rextsize); + error = xfs_rtallocate_extent(ap->tp, ap->blkno, raminlen, ap->length, + &ralen, ap->wasdel, prod, &rtb); if (error) return error; - ap->blkno = rtb; - if (ap->blkno != NULLFSBLOCK) { - ap->blkno *= mp->m_sb.sb_rextsize; - ralen *= mp->m_sb.sb_rextsize; - ap->length = ralen; - ap->ip->i_d.di_nblocks += ralen; + if (rtb != NULLRTBLOCK) { + ap->blkno = rtb * mp->m_sb.sb_rextsize; + ap->length = ralen * mp->m_sb.sb_rextsize; + ap->ip->i_nblocks += ap->length; xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); if (ap->wasdel) - ap->ip->i_delayed_blks -= ralen; + ap->ip->i_delayed_blks -= ap->length; /* * Adjust the disk quota also. This was reserved * earlier. */ xfs_trans_mod_dquot_byino(ap->tp, ap->ip, ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : - XFS_TRANS_DQ_RTBCOUNT, (long) ralen); - } else { - ap->length = 0; + XFS_TRANS_DQ_RTBCOUNT, ap->length); + return 0; } + + if (align > mp->m_sb.sb_rextsize) { + /* + * We previously enlarged the request length to try to satisfy + * an extent size hint. The allocator didn't return anything, + * so reset the parameters to the original values and try again + * without alignment criteria. + */ + ap->offset = orig_offset; + ap->length = orig_length; + minlen = align = mp->m_sb.sb_rextsize; + goto retry; + } + + if (!ignore_locality && ap->blkno != 0) { + /* + * If we can't allocate near a specific rt extent, try again + * without locality criteria. + */ + ignore_locality = true; + goto retry; + } + + ap->blkno = NULLFSBLOCK; + ap->length = 0; return 0; } #endif /* CONFIG_XFS_RT */ @@ -212,7 +256,7 @@ xfs_bmap_count_blocks( xfs_filblks_t *count) { struct xfs_mount *mp = ip->i_mount; - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); struct xfs_btree_cur *cur; xfs_extlen_t btblocks = 0; int error; @@ -223,13 +267,11 @@ xfs_bmap_count_blocks( if (!ifp) return 0; - switch (XFS_IFORK_FORMAT(ip, whichfork)) { + switch (ifp->if_format) { case XFS_DINODE_FMT_BTREE: - if (!(ifp->if_flags & XFS_IFEXTENTS)) { - error = xfs_iread_extents(tp, ip, whichfork); - if (error) - return error; - } + error = xfs_iread_extents(tp, ip, whichfork); + if (error) + return error; cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); error = xfs_btree_count_blocks(cur, &btblocks); @@ -244,7 +286,7 @@ xfs_bmap_count_blocks( */ *count += btblocks - 1; - /* fall through */ + fallthrough; case XFS_DINODE_FMT_EXTENTS: *nextents = xfs_bmap_count_leaves(ifp, count); break; @@ -397,33 +439,32 @@ xfs_getbmap( whichfork = XFS_COW_FORK; else whichfork = XFS_DATA_FORK; - ifp = XFS_IFORK_PTR(ip, whichfork); xfs_ilock(ip, XFS_IOLOCK_SHARED); switch (whichfork) { case XFS_ATTR_FORK: - if (!XFS_IFORK_Q(ip)) - goto out_unlock_iolock; + lock = xfs_ilock_attr_map_shared(ip); + if (!xfs_inode_has_attr_fork(ip)) + goto out_unlock_ilock; max_len = 1LL << 32; - lock = xfs_ilock_attr_map_shared(ip); break; case XFS_COW_FORK: + lock = XFS_ILOCK_SHARED; + xfs_ilock(ip, lock); + /* No CoW fork? Just return */ - if (!ifp) - goto out_unlock_iolock; + if (!xfs_ifork_ptr(ip, whichfork)) + goto out_unlock_ilock; if (xfs_get_cowextsz_hint(ip)) max_len = mp->m_super->s_maxbytes; else max_len = XFS_ISIZE(ip); - - lock = XFS_ILOCK_SHARED; - xfs_ilock(ip, lock); break; case XFS_DATA_FORK: if (!(iflags & BMV_IF_DELALLOC) && - (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { + (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_disk_size)) { error = filemap_write_and_wait(VFS_I(ip)->i_mapping); if (error) goto out_unlock_iolock; @@ -439,7 +480,7 @@ xfs_getbmap( } if (xfs_get_extsz_hint(ip) || - (ip->i_d.di_flags & + (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))) max_len = mp->m_super->s_maxbytes; else @@ -449,7 +490,9 @@ xfs_getbmap( break; } - switch (XFS_IFORK_FORMAT(ip, whichfork)) { + ifp = xfs_ifork_ptr(ip, whichfork); + + switch (ifp->if_format) { case XFS_DINODE_FMT_EXTENTS: case XFS_DINODE_FMT_BTREE: break; @@ -471,11 +514,9 @@ xfs_getbmap( first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset); len = XFS_BB_TO_FSB(mp, bmv->bmv_length); - if (!(ifp->if_flags & XFS_IFEXTENTS)) { - error = xfs_iread_extents(NULL, ip, whichfork); - if (error) - goto out_unlock_ilock; - } + error = xfs_iread_extents(NULL, ip, whichfork); + if (error) + goto out_unlock_ilock; if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) { /* @@ -558,7 +599,7 @@ xfs_bmap_punch_delalloc_range( struct xfs_iext_cursor icur; int error = 0; - ASSERT(ifp->if_flags & XFS_IFEXTENTS); + ASSERT(!xfs_need_iread_extents(ifp)); xfs_ilock(ip, XFS_ILOCK_EXCL); if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got)) @@ -597,8 +638,24 @@ out_unlock: * regular files that are marked preallocated or append-only. */ bool -xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) +xfs_can_free_eofblocks( + struct xfs_inode *ip, + bool force) { + struct xfs_bmbt_irec imap; + struct xfs_mount *mp = ip->i_mount; + xfs_fileoff_t end_fsb; + xfs_fileoff_t last_fsb; + int nimaps = 1; + int error; + + /* + * Caller must either hold the exclusive io lock; or be inactivating + * the inode, which guarantees there are no other users of the inode. + */ + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL) || + (VFS_I(ip)->i_state & I_FREEING)); + /* prealloc/delalloc exists only on regular files */ if (!S_ISREG(VFS_I(ip)->i_mode)) return false; @@ -613,18 +670,45 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) return false; /* If we haven't read in the extent list, then don't do it now. */ - if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) + if (xfs_need_iread_extents(&ip->i_df)) return false; /* * Do not free real preallocated or append-only files unless the file * has delalloc blocks and we are forced to remove them. */ - if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) + if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) if (!force || ip->i_delayed_blks == 0) return false; - return true; + /* + * Do not try to free post-EOF blocks if EOF is beyond the end of the + * range supported by the page cache, because the truncation will loop + * forever. + */ + end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip)); + if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1) + end_fsb = roundup_64(end_fsb, mp->m_sb.sb_rextsize); + last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); + if (last_fsb <= end_fsb) + return false; + + /* + * Look up the mapping for the first block past EOF. If we can't find + * it, there's nothing to free. + */ + xfs_ilock(ip, XFS_ILOCK_SHARED); + error = xfs_bmapi_read(ip, end_fsb, last_fsb - end_fsb, &imap, &nimaps, + 0); + xfs_iunlock(ip, XFS_ILOCK_SHARED); + if (error || nimaps == 0) + return false; + + /* + * If there's a real mapping there or there are delayed allocation + * reservations, then we have post-EOF blocks to try to free. + */ + return imap.br_startblock != HOLESTARTBLOCK || ip->i_delayed_blks; } /* @@ -637,78 +721,52 @@ xfs_free_eofblocks( struct xfs_inode *ip) { struct xfs_trans *tp; - int error; - xfs_fileoff_t end_fsb; - xfs_fileoff_t last_fsb; - xfs_filblks_t map_len; - int nimaps; - struct xfs_bmbt_irec imap; struct xfs_mount *mp = ip->i_mount; + int error; - /* - * Figure out if there are any blocks beyond the end - * of the file. If not, then there is nothing to do. - */ - end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip)); - last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); - if (last_fsb <= end_fsb) - return 0; - map_len = last_fsb - end_fsb; + /* Attach the dquots to the inode up front. */ + error = xfs_qm_dqattach(ip); + if (error) + return error; - nimaps = 1; - xfs_ilock(ip, XFS_ILOCK_SHARED); - error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); - xfs_iunlock(ip, XFS_ILOCK_SHARED); + /* Wait on dio to ensure i_size has settled. */ + inode_dio_wait(VFS_I(ip)); - /* - * If there are blocks after the end of file, truncate the file to its - * current size to free them up. - */ - if (!error && (nimaps != 0) && - (imap.br_startblock != HOLESTARTBLOCK || - ip->i_delayed_blks)) { - /* - * Attach the dquots to the inode up front. - */ - error = xfs_qm_dqattach(ip); - if (error) - return error; + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); + if (error) { + ASSERT(xfs_is_shutdown(mp)); + return error; + } - /* wait on dio to ensure i_size has settled */ - inode_dio_wait(VFS_I(ip)); + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, - &tp); - if (error) { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); - return error; - } + /* + * Do not update the on-disk file size. If we update the on-disk file + * size and then the system crashes before the contents of the file are + * flushed to disk then the files may be full of holes (ie NULL files + * bug). + */ + error = xfs_itruncate_extents_flags(&tp, ip, XFS_DATA_FORK, + XFS_ISIZE(ip), XFS_BMAPI_NODISCARD); + if (error) + goto err_cancel; - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, 0); + error = xfs_trans_commit(tp); + if (error) + goto out_unlock; - /* - * Do not update the on-disk file size. If we update the - * on-disk file size and then the system crashes before the - * contents of the file are flushed to disk then the files - * may be full of holes (ie NULL files bug). - */ - error = xfs_itruncate_extents_flags(&tp, ip, XFS_DATA_FORK, - XFS_ISIZE(ip), XFS_BMAPI_NODISCARD); - if (error) { - /* - * If we get an error at this point we simply don't - * bother truncating the file. - */ - xfs_trans_cancel(tp); - } else { - error = xfs_trans_commit(tp); - if (!error) - xfs_inode_clear_eofblocks_tag(ip); - } + xfs_inode_clear_eofblocks_tag(ip); + goto out_unlock; - xfs_iunlock(ip, XFS_ILOCK_EXCL); - } +err_cancel: + /* + * If we get an error at this point we simply don't + * bother truncating the file. + */ + xfs_trans_cancel(tp); +out_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } @@ -716,8 +774,7 @@ int xfs_alloc_file_space( struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t len, - int alloc_type) + xfs_off_t len) { xfs_mount_t *mp = ip->i_mount; xfs_off_t count; @@ -727,16 +784,14 @@ xfs_alloc_file_space( xfs_fileoff_t startoffset_fsb; xfs_fileoff_t endoffset_fsb; int nimaps; - int quota_flag; int rt; xfs_trans_t *tp; xfs_bmbt_irec_t imaps[1], *imapp; - uint qblocks, resblks, resrtextents; int error; trace_xfs_alloc_file_space(ip); - if (XFS_FORCED_SHUTDOWN(mp)) + if (xfs_is_shutdown(mp)) return -EIO; error = xfs_qm_dqattach(ip); @@ -761,6 +816,7 @@ xfs_alloc_file_space( */ while (allocatesize_fsb && !error) { xfs_fileoff_t s, e; + unsigned int dblocks, rblocks, resblks; /* * Determine space reservations for data/realtime. @@ -786,53 +842,41 @@ xfs_alloc_file_space( * count, hence we need to limit the number of blocks we are * trying to reserve to avoid an overflow. We can't allocate * more than @nimaps extents, and an extent is limited on disk - * to MAXEXTLEN (21 bits), so use that to enforce the limit. + * to XFS_BMBT_MAX_EXTLEN (21 bits), so use that to enforce the + * limit. */ - resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps)); + resblks = min_t(xfs_fileoff_t, (e - s), + (XFS_MAX_BMBT_EXTLEN * nimaps)); if (unlikely(rt)) { - resrtextents = qblocks = resblks; - resrtextents /= mp->m_sb.sb_rextsize; - resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); - quota_flag = XFS_QMOPT_RES_RTBLKS; + dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0); + rblocks = resblks; } else { - resrtextents = 0; - resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); - quota_flag = XFS_QMOPT_RES_REGBLKS; + dblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); + rblocks = 0; } - /* - * Allocate and setup the transaction. - */ - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, - resrtextents, 0, &tp); - - /* - * Check for running out of space - */ - if (error) { - /* - * Free the transaction structure. - */ - ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); - break; - } - xfs_ilock(ip, XFS_ILOCK_EXCL); - error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, - 0, quota_flag); + error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, + dblocks, rblocks, false, &tp); if (error) - goto error1; + break; - xfs_trans_ijoin(tp, ip, 0); + error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, + XFS_IEXT_ADD_NOSPLIT_CNT); + if (error == -EFBIG) + error = xfs_iext_count_upgrade(tp, ip, + XFS_IEXT_ADD_NOSPLIT_CNT); + if (error) + goto error; error = xfs_bmapi_write(tp, ip, startoffset_fsb, - allocatesize_fsb, alloc_type, 0, imapp, - &nimaps); + allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp, + &nimaps); if (error) - goto error0; + goto error; + + ip->i_diflags |= XFS_DIFLAG_PREALLOC; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - /* - * Complete the transaction - */ error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) @@ -851,10 +895,7 @@ xfs_alloc_file_space( return error; -error0: /* unlock inode, unreserve quota blocks, cancel trans */ - xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); - -error1: /* Just cancel transaction */ +error: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; @@ -872,20 +913,18 @@ xfs_unmap_extent( uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); int error; - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); - if (error) { - ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); + error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, resblks, 0, + false, &tp); + if (error) return error; - } - xfs_ilock(ip, XFS_ILOCK_EXCL); - error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, ip->i_gdquot, - ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS); + error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, + XFS_IEXT_PUNCH_HOLE_CNT); + if (error == -EFBIG) + error = xfs_iext_count_upgrade(tp, ip, XFS_IEXT_PUNCH_HOLE_CNT); if (error) goto out_trans_cancel; - xfs_trans_ijoin(tp, ip, 0); - error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, done); if (error) goto out_trans_cancel; @@ -912,7 +951,7 @@ xfs_flush_unmap_range( xfs_off_t rounding, start, end; int error; - rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE); + rounding = max_t(xfs_off_t, mp->m_sb.sb_blocksize, PAGE_SIZE); start = round_down(offset, rounding); end = round_up(offset + len, rounding) - 1; @@ -946,6 +985,14 @@ xfs_free_file_space( startoffset_fsb = XFS_B_TO_FSB(mp, offset); endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); + /* We can only free complete realtime extents. */ + if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1) { + startoffset_fsb = roundup_64(startoffset_fsb, + mp->m_sb.sb_rextsize); + endoffset_fsb = rounddown_64(endoffset_fsb, + mp->m_sb.sb_rextsize); + } + /* * Need to zero the stuff we're not freeing, on disk. */ @@ -960,7 +1007,7 @@ xfs_free_file_space( /* * Now that we've unmap all full blocks we'll have to zero out any - * partial block at the beginning and/or end. iomap_zero_range is smart + * partial block at the beginning and/or end. xfs_zero_range is smart * enough to skip any holes, including those we just created, but we * must take care not to zero beyond EOF and enlarge i_size. */ @@ -968,15 +1015,14 @@ xfs_free_file_space( return 0; if (offset + len > XFS_ISIZE(ip)) len = XFS_ISIZE(ip) - offset; - error = iomap_zero_range(VFS_I(ip), offset, len, NULL, - &xfs_buffered_write_iomap_ops); + error = xfs_zero_range(ip, offset, len, NULL); if (error) return error; /* * If we zeroed right up to EOF and EOF straddles a page boundary we * must make sure that the post-EOF area is also zeroed because the - * page could be mmap'd and iomap_zero_range doesn't do that for us. + * page could be mmap'd and xfs_zero_range doesn't do that for us. * Writeback of the eof page will do this, albeit clumsily. */ if (offset + len >= XFS_ISIZE(ip) && offset_in_page(offset + len) > 0) { @@ -1012,9 +1058,9 @@ xfs_prepare_shift( * extent (after split) during the shift and corrupt the file. Start * with the block just prior to the start to stabilize the boundary. */ - offset = round_down(offset, 1 << mp->m_sb.sb_blocklog); + offset = round_down(offset, mp->m_sb.sb_blocksize); if (offset) - offset -= (1 << mp->m_sb.sb_blocklog); + offset -= mp->m_sb.sb_blocksize; /* * Writeback and invalidate cache for the remainder of the file as we're @@ -1062,7 +1108,6 @@ xfs_collapse_file_space( int error; xfs_fileoff_t next_fsb = XFS_B_TO_FSB(mp, offset + len); xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len); - uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); bool done = false; ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); @@ -1078,32 +1123,34 @@ xfs_collapse_file_space( if (error) return error; - while (!error && !done) { - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, - &tp); - if (error) - break; + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); + if (error) + return error; - xfs_ilock(ip, XFS_ILOCK_EXCL); - error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, - ip->i_gdquot, ip->i_pdquot, resblks, 0, - XFS_QMOPT_RES_REGBLKS); - if (error) - goto out_trans_cancel; - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + while (!done) { error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb, &done); if (error) goto out_trans_cancel; + if (done) + break; - error = xfs_trans_commit(tp); + /* finish any deferred frees and roll the transaction */ + error = xfs_defer_finish(&tp); + if (error) + goto out_trans_cancel; } + error = xfs_trans_commit(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; out_trans_cancel: xfs_trans_cancel(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } @@ -1146,35 +1193,48 @@ xfs_insert_file_space( if (error) return error; + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, + XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp); + if (error) + return error; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + + error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, + XFS_IEXT_PUNCH_HOLE_CNT); + if (error == -EFBIG) + error = xfs_iext_count_upgrade(tp, ip, XFS_IEXT_PUNCH_HOLE_CNT); + if (error) + goto out_trans_cancel; + /* * The extent shifting code works on extent granularity. So, if stop_fsb * is not the starting block of extent, we need to split the extent at * stop_fsb. */ - error = xfs_bmap_split_extent(ip, stop_fsb); + error = xfs_bmap_split_extent(tp, ip, stop_fsb); if (error) - return error; + goto out_trans_cancel; - while (!error && !done) { - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, - &tp); + do { + error = xfs_defer_finish(&tp); if (error) - break; + goto out_trans_cancel; - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb, &done, stop_fsb); if (error) goto out_trans_cancel; + } while (!done); - error = xfs_trans_commit(tp); - } - + error = xfs_trans_commit(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; out_trans_cancel: xfs_trans_cancel(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } @@ -1203,24 +1263,33 @@ xfs_swap_extents_check_format( struct xfs_inode *ip, /* target inode */ struct xfs_inode *tip) /* tmp inode */ { + struct xfs_ifork *ifp = &ip->i_df; + struct xfs_ifork *tifp = &tip->i_df; + + /* User/group/project quota ids must match if quotas are enforced. */ + if (XFS_IS_QUOTA_ON(ip->i_mount) && + (!uid_eq(VFS_I(ip)->i_uid, VFS_I(tip)->i_uid) || + !gid_eq(VFS_I(ip)->i_gid, VFS_I(tip)->i_gid) || + ip->i_projid != tip->i_projid)) + return -EINVAL; /* Should never get a local format */ - if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || - tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) + if (ifp->if_format == XFS_DINODE_FMT_LOCAL || + tifp->if_format == XFS_DINODE_FMT_LOCAL) return -EINVAL; /* * if the target inode has less extents that then temporary inode then * why did userspace call us? */ - if (ip->i_d.di_nextents < tip->i_d.di_nextents) + if (ifp->if_nextents < tifp->if_nextents) return -EINVAL; /* * If we have to use the (expensive) rmap swap method, we can * handle any number of extents and any format. */ - if (xfs_sb_version_hasrmapbt(&ip->i_mount->m_sb)) + if (xfs_has_rmapbt(ip->i_mount)) return 0; /* @@ -1228,20 +1297,18 @@ xfs_swap_extents_check_format( * form then we will end up with the target inode in the wrong format * as we already know there are less extents in the temp inode. */ - if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && - tip->i_d.di_format == XFS_DINODE_FMT_BTREE) + if (ifp->if_format == XFS_DINODE_FMT_EXTENTS && + tifp->if_format == XFS_DINODE_FMT_BTREE) return -EINVAL; /* Check temp in extent form to max in target */ - if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > - XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) + if (tifp->if_format == XFS_DINODE_FMT_EXTENTS && + tifp->if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) return -EINVAL; /* Check target in extent form to max in temp */ - if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > - XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) + if (ifp->if_format == XFS_DINODE_FMT_EXTENTS && + ifp->if_nextents > XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) return -EINVAL; /* @@ -1253,22 +1320,20 @@ xfs_swap_extents_check_format( * (a common defrag case) which will occur when the temp inode is in * extent format... */ - if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { - if (XFS_IFORK_Q(ip) && - XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip)) + if (tifp->if_format == XFS_DINODE_FMT_BTREE) { + if (xfs_inode_has_attr_fork(ip) && + XFS_BMAP_BMDR_SPACE(tifp->if_broot) > xfs_inode_fork_boff(ip)) return -EINVAL; - if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= - XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) + if (tifp->if_nextents <= XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) return -EINVAL; } /* Reciprocal target->temp btree format checks */ - if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { - if (XFS_IFORK_Q(tip) && - XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip)) + if (ifp->if_format == XFS_DINODE_FMT_BTREE) { + if (xfs_inode_has_attr_fork(tip) && + XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > xfs_inode_fork_boff(tip)) return -EINVAL; - if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= - XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) + if (ifp->if_nextents <= XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) return -EINVAL; } @@ -1320,9 +1385,9 @@ xfs_swap_extent_rmap( * rmap functions when we go to fix up the rmaps. The flags * will be switch for reals later. */ - tip_flags2 = tip->i_d.di_flags2; - if (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK) - tip->i_d.di_flags2 |= XFS_DIFLAG2_REFLINK; + tip_flags2 = tip->i_diflags2; + if (ip->i_diflags2 & XFS_DIFLAG2_REFLINK) + tip->i_diflags2 |= XFS_DIFLAG2_REFLINK; offset_fsb = 0; end_fsb = XFS_B_TO_FSB(ip->i_mount, i_size_read(VFS_I(ip))); @@ -1364,6 +1429,28 @@ xfs_swap_extent_rmap( irec.br_blockcount); trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec); + if (xfs_bmap_is_real_extent(&uirec)) { + error = xfs_iext_count_may_overflow(ip, + XFS_DATA_FORK, + XFS_IEXT_SWAP_RMAP_CNT); + if (error == -EFBIG) + error = xfs_iext_count_upgrade(tp, ip, + XFS_IEXT_SWAP_RMAP_CNT); + if (error) + goto out; + } + + if (xfs_bmap_is_real_extent(&irec)) { + error = xfs_iext_count_may_overflow(tip, + XFS_DATA_FORK, + XFS_IEXT_SWAP_RMAP_CNT); + if (error == -EFBIG) + error = xfs_iext_count_upgrade(tp, ip, + XFS_IEXT_SWAP_RMAP_CNT); + if (error) + goto out; + } + /* Remove the mapping from the donor file. */ xfs_bmap_unmap_extent(tp, tip, &uirec); @@ -1393,12 +1480,12 @@ xfs_swap_extent_rmap( offset_fsb += ilen; } - tip->i_d.di_flags2 = tip_flags2; + tip->i_diflags2 = tip_flags2; return 0; out: trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_); - tip->i_d.di_flags2 = tip_flags2; + tip->i_diflags2 = tip_flags2; return error; } @@ -1420,15 +1507,15 @@ xfs_swap_extent_forks( /* * Count the number of extended attribute blocks */ - if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && - (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { + if (xfs_inode_has_attr_fork(ip) && ip->i_af.if_nextents > 0 && + ip->i_af.if_format != XFS_DINODE_FMT_LOCAL) { error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &junk, &aforkblks); if (error) return error; } - if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && - (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { + if (xfs_inode_has_attr_fork(tip) && tip->i_af.if_nextents > 0 && + tip->i_af.if_format != XFS_DINODE_FMT_LOCAL) { error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &junk, &taforkblks); if (error) @@ -1442,12 +1529,12 @@ xfs_swap_extent_forks( * event of a crash. Set the owner change log flags now and leave the * bmbt scan as the last step. */ - if (ip->i_d.di_version == 3 && - ip->i_d.di_format == XFS_DINODE_FMT_BTREE) - (*target_log_flags) |= XFS_ILOG_DOWNER; - if (tip->i_d.di_version == 3 && - tip->i_d.di_format == XFS_DINODE_FMT_BTREE) - (*src_log_flags) |= XFS_ILOG_DOWNER; + if (xfs_has_v3inodes(ip->i_mount)) { + if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE) + (*target_log_flags) |= XFS_ILOG_DOWNER; + if (tip->i_df.if_format == XFS_DINODE_FMT_BTREE) + (*src_log_flags) |= XFS_ILOG_DOWNER; + } /* * Swap the data forks of the inodes @@ -1457,12 +1544,9 @@ xfs_swap_extent_forks( /* * Fix the on-disk inode values */ - tmp = (uint64_t)ip->i_d.di_nblocks; - ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; - tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; - - swap(ip->i_d.di_nextents, tip->i_d.di_nextents); - swap(ip->i_d.di_format, tip->i_d.di_format); + tmp = (uint64_t)ip->i_nblocks; + ip->i_nblocks = tip->i_nblocks - taforkblks + aforkblks; + tip->i_nblocks = tmp + taforkblks - aforkblks; /* * The extents in the source inode could still contain speculative @@ -1477,24 +1561,24 @@ xfs_swap_extent_forks( tip->i_delayed_blks = ip->i_delayed_blks; ip->i_delayed_blks = 0; - switch (ip->i_d.di_format) { + switch (ip->i_df.if_format) { case XFS_DINODE_FMT_EXTENTS: (*src_log_flags) |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: - ASSERT(ip->i_d.di_version < 3 || + ASSERT(!xfs_has_v3inodes(ip->i_mount) || (*src_log_flags & XFS_ILOG_DOWNER)); (*src_log_flags) |= XFS_ILOG_DBROOT; break; } - switch (tip->i_d.di_format) { + switch (tip->i_df.if_format) { case XFS_DINODE_FMT_EXTENTS: (*target_log_flags) |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: (*target_log_flags) |= XFS_ILOG_DBROOT; - ASSERT(tip->i_d.di_version < 3 || + ASSERT(!xfs_has_v3inodes(ip->i_mount) || (*target_log_flags & XFS_ILOG_DOWNER)); break; } @@ -1555,9 +1639,9 @@ xfs_swap_extents( struct xfs_bstat *sbp = &sxp->sx_stat; int src_log_flags, target_log_flags; int error = 0; - int lock_flags; uint64_t f; int resblks = 0; + unsigned int flags = 0; /* * Lock the inodes against other IO, page faults and truncate to @@ -1566,8 +1650,8 @@ xfs_swap_extents( * do the rest of the checks. */ lock_two_nondirectories(VFS_I(ip), VFS_I(tip)); - lock_flags = XFS_MMAPLOCK_EXCL; - xfs_lock_two_inodes(ip, XFS_MMAPLOCK_EXCL, tip, XFS_MMAPLOCK_EXCL); + filemap_invalidate_lock_two(VFS_I(ip)->i_mapping, + VFS_I(tip)->i_mapping); /* Verify that both files have the same format */ if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) { @@ -1599,7 +1683,7 @@ xfs_swap_extents( if (xfs_inode_has_cow_data(tip)) { error = xfs_reflink_cancel_cow_range(tip, 0, NULLFILEOFF, true); if (error) - return error; + goto out_unlock; } /* @@ -1607,10 +1691,10 @@ xfs_swap_extents( * a block reservation because it's really just a remap operation * performed with log redo items! */ - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { - int w = XFS_DATA_FORK; - uint32_t ipnext = XFS_IFORK_NEXTENTS(ip, w); - uint32_t tipnext = XFS_IFORK_NEXTENTS(tip, w); + if (xfs_has_rmapbt(mp)) { + int w = XFS_DATA_FORK; + uint32_t ipnext = ip->i_df.if_nextents; + uint32_t tipnext = tip->i_df.if_nextents; /* * Conceptually this shouldn't affect the shape of either bmbt, @@ -1621,17 +1705,16 @@ xfs_swap_extents( resblks += XFS_SWAP_RMAP_SPACE_RES(mp, tipnext, w); /* - * Handle the corner case where either inode might straddle the - * btree format boundary. If so, the inode could bounce between - * btree <-> extent format on unmap -> remap cycles, freeing and - * allocating a bmapbt block each time. + * If either inode straddles a bmapbt block allocation boundary, + * the rmapbt algorithm triggers repeated allocs and frees as + * extents are remapped. This can exhaust the block reservation + * prematurely and cause shutdown. Return freed blocks to the + * transaction reservation to counter this behavior. */ - if (ipnext == (XFS_IFORK_MAXEXT(ip, w) + 1)) - resblks += XFS_IFORK_MAXEXT(ip, w); - if (tipnext == (XFS_IFORK_MAXEXT(tip, w) + 1)) - resblks += XFS_IFORK_MAXEXT(tip, w); + flags |= XFS_TRANS_RES_FDBLKS; } - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, flags, + &tp); if (error) goto out_unlock; @@ -1640,15 +1723,14 @@ xfs_swap_extents( * or cancel will unlock the inodes from this point onwards. */ xfs_lock_two_inodes(ip, XFS_ILOCK_EXCL, tip, XFS_ILOCK_EXCL); - lock_flags |= XFS_ILOCK_EXCL; xfs_trans_ijoin(tp, ip, 0); xfs_trans_ijoin(tp, tip, 0); /* Verify all data are being swapped */ if (sxp->sx_offset != 0 || - sxp->sx_length != ip->i_d.di_size || - sxp->sx_length != tip->i_d.di_size) { + sxp->sx_length != ip->i_disk_size || + sxp->sx_length != tip->i_disk_size) { error = -EFAULT; goto out_trans_cancel; } @@ -1690,7 +1772,7 @@ xfs_swap_extents( src_log_flags = XFS_ILOG_CORE; target_log_flags = XFS_ILOG_CORE; - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + if (xfs_has_rmapbt(mp)) error = xfs_swap_extent_rmap(&tp, ip, tip); else error = xfs_swap_extent_forks(tp, ip, tip, &src_log_flags, @@ -1699,21 +1781,22 @@ xfs_swap_extents( goto out_trans_cancel; /* Do we have to swap reflink flags? */ - if ((ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK) ^ - (tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK)) { - f = ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK; - ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; - ip->i_d.di_flags2 |= tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK; - tip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; - tip->i_d.di_flags2 |= f & XFS_DIFLAG2_REFLINK; + if ((ip->i_diflags2 & XFS_DIFLAG2_REFLINK) ^ + (tip->i_diflags2 & XFS_DIFLAG2_REFLINK)) { + f = ip->i_diflags2 & XFS_DIFLAG2_REFLINK; + ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; + ip->i_diflags2 |= tip->i_diflags2 & XFS_DIFLAG2_REFLINK; + tip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; + tip->i_diflags2 |= f & XFS_DIFLAG2_REFLINK; } /* Swap the cow forks. */ - if (xfs_sb_version_hasreflink(&mp->m_sb)) { - ASSERT(ip->i_cformat == XFS_DINODE_FMT_EXTENTS); - ASSERT(tip->i_cformat == XFS_DINODE_FMT_EXTENTS); + if (xfs_has_reflink(mp)) { + ASSERT(!ip->i_cowfp || + ip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS); + ASSERT(!tip->i_cowfp || + tip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS); - swap(ip->i_cnextents, tip->i_cnextents); swap(ip->i_cowfp, tip->i_cowfp); if (ip->i_cowfp && ip->i_cowfp->if_bytes) @@ -1750,7 +1833,7 @@ xfs_swap_extents( * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ - if (mp->m_flags & XFS_MOUNT_WSYNC) + if (xfs_has_wsync(mp)) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp); @@ -1758,13 +1841,16 @@ xfs_swap_extents( trace_xfs_swap_extent_after(ip, 0); trace_xfs_swap_extent_after(tip, 1); +out_unlock_ilock: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_iunlock(tip, XFS_ILOCK_EXCL); out_unlock: - xfs_iunlock(ip, lock_flags); - xfs_iunlock(tip, lock_flags); + filemap_invalidate_unlock_two(VFS_I(ip)->i_mapping, + VFS_I(tip)->i_mapping); unlock_two_nondirectories(VFS_I(ip), VFS_I(tip)); return error; out_trans_cancel: xfs_trans_cancel(tp); - goto out_unlock; + goto out_unlock_ilock; } |