diff options
Diffstat (limited to 'fs/xfs/xfs_file.c')
-rw-r--r-- | fs/xfs/xfs_file.c | 276 |
1 files changed, 138 insertions, 138 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 52c91e143725..de5368c803f9 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -38,6 +38,7 @@ #include "xfs_trace.h" #include "xfs_log.h" #include "xfs_dinode.h" +#include "xfs_icache.h" #include <linux/aio.h> #include <linux/dcache.h> @@ -179,7 +180,7 @@ xfs_file_fsync( return error; if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); + return -EIO; xfs_iflags_clear(ip, XFS_ITRUNCATED); @@ -225,46 +226,40 @@ xfs_file_fsync( !log_flushed) xfs_blkdev_issue_flush(mp->m_ddev_targp); - return -error; + return error; } STATIC ssize_t -xfs_file_aio_read( +xfs_file_read_iter( struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos) + struct iov_iter *to) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - size_t size = 0; + size_t size = iov_iter_count(to); ssize_t ret = 0; int ioflags = 0; xfs_fsize_t n; + loff_t pos = iocb->ki_pos; XFS_STATS_INC(xs_read_calls); - BUG_ON(iocb->ki_pos != pos); - if (unlikely(file->f_flags & O_DIRECT)) - ioflags |= IO_ISDIRECT; + ioflags |= XFS_IO_ISDIRECT; if (file->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; + ioflags |= XFS_IO_INVIS; - ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE); - if (ret < 0) - return ret; - - if (unlikely(ioflags & IO_ISDIRECT)) { + if (unlikely(ioflags & XFS_IO_ISDIRECT)) { xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; - if ((pos & target->bt_smask) || (size & target->bt_smask)) { + /* DIO must be aligned to device logical sector size */ + if ((pos | size) & target->bt_logical_sectormask) { if (pos == i_size_read(inode)) return 0; - return -XFS_ERROR(EINVAL); + return -EINVAL; } } @@ -289,26 +284,36 @@ xfs_file_aio_read( * proceeed concurrently without serialisation. */ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); - if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) { + if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) { xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); if (inode->i_mapping->nrpages) { - ret = -filemap_write_and_wait_range( + ret = filemap_write_and_wait_range( VFS_I(ip)->i_mapping, - pos, -1); + pos, pos + size - 1); if (ret) { xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } - truncate_pagecache_range(VFS_I(ip), pos, -1); + + /* + * Invalidate whole pages. This can return an error if + * we fail to invalidate a page, but this should never + * happen on XFS. Warn if it does fail. + */ + ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + pos >> PAGE_CACHE_SHIFT, + (pos + size - 1) >> PAGE_CACHE_SHIFT); + WARN_ON_ONCE(ret); + ret = 0; } xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); } trace_xfs_file_read(ip, size, pos, ioflags); - ret = generic_file_aio_read(iocb, iovp, nr_segs, pos); + ret = generic_file_read_iter(iocb, to); if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); @@ -331,7 +336,7 @@ xfs_file_splice_read( XFS_STATS_INC(xs_read_calls); if (infilp->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; + ioflags |= XFS_IO_INVIS; if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; @@ -349,47 +354,6 @@ xfs_file_splice_read( } /* - * xfs_file_splice_write() does not use xfs_rw_ilock() because - * generic_file_splice_write() takes the i_mutex itself. This, in theory, - * couuld cause lock inversions between the aio_write path and the splice path - * if someone is doing concurrent splice(2) based writes and write(2) based - * writes to the same inode. The only real way to fix this is to re-implement - * the generic code here with correct locking orders. - */ -STATIC ssize_t -xfs_file_splice_write( - struct pipe_inode_info *pipe, - struct file *outfilp, - loff_t *ppos, - size_t count, - unsigned int flags) -{ - struct inode *inode = outfilp->f_mapping->host; - struct xfs_inode *ip = XFS_I(inode); - int ioflags = 0; - ssize_t ret; - - XFS_STATS_INC(xs_write_calls); - - if (outfilp->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return -EIO; - - xfs_ilock(ip, XFS_IOLOCK_EXCL); - - trace_xfs_file_splice_write(ip, count, *ppos, ioflags); - - ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); - if (ret > 0) - XFS_STATS_ADD(xs_write_bytes, ret); - - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return ret; -} - -/* * This routine is called to handle zeroing any space in the last block of the * file that is beyond the EOF. We do this since the size is being increased * without writing anything to that block and we don't want to read the @@ -571,7 +535,7 @@ restart: xfs_rw_ilock(ip, *iolock); goto restart; } - error = -xfs_zero_eof(ip, *pos, i_size_read(inode)); + error = xfs_zero_eof(ip, *pos, i_size_read(inode)); if (error) return error; } @@ -624,10 +588,7 @@ restart: STATIC ssize_t xfs_file_dio_aio_write( struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos, - size_t ocount) + struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -635,15 +596,18 @@ xfs_file_dio_aio_write( struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; ssize_t ret = 0; - size_t count = ocount; int unaligned_io = 0; int iolock; + size_t count = iov_iter_count(from); + loff_t pos = iocb->ki_pos; struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; - if ((pos & target->bt_smask) || (count & target->bt_smask)) - return -XFS_ERROR(EINVAL); + /* DIO must be aligned to device logical sector size */ + if ((pos | count) & target->bt_logical_sectormask) + return -EINVAL; + /* "unaligned" here means not aligned to a filesystem block */ if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) unaligned_io = 1; @@ -674,13 +638,23 @@ xfs_file_dio_aio_write( ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); if (ret) goto out; + iov_iter_truncate(from, count); if (mapping->nrpages) { - ret = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, - pos, -1); + ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, + pos, pos + count - 1); if (ret) goto out; - truncate_pagecache_range(VFS_I(ip), pos, -1); + /* + * Invalidate whole pages. This can return an error if + * we fail to invalidate a page, but this should never + * happen on XFS. Warn if it does fail. + */ + ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + pos >> PAGE_CACHE_SHIFT, + (pos + count - 1) >> PAGE_CACHE_SHIFT); + WARN_ON_ONCE(ret); + ret = 0; } /* @@ -695,8 +669,7 @@ xfs_file_dio_aio_write( } trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); - ret = generic_file_direct_write(iocb, iovp, - &nr_segs, pos, &iocb->ki_pos, count, ocount); + ret = generic_file_direct_write(iocb, from, pos); out: xfs_rw_iunlock(ip, iolock); @@ -709,10 +682,7 @@ out: STATIC ssize_t xfs_file_buffered_aio_write( struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos, - size_t ocount) + struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -721,7 +691,8 @@ xfs_file_buffered_aio_write( ssize_t ret; int enospc = 0; int iolock = XFS_IOLOCK_EXCL; - size_t count = ocount; + loff_t pos = iocb->ki_pos; + size_t count = iov_iter_count(from); xfs_rw_ilock(ip, iolock); @@ -729,22 +700,37 @@ xfs_file_buffered_aio_write( if (ret) goto out; + iov_iter_truncate(from, count); /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; write_retry: trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); - ret = generic_file_buffered_write(iocb, iovp, nr_segs, - pos, &iocb->ki_pos, count, 0); + ret = generic_perform_write(file, from, pos); + if (likely(ret >= 0)) + iocb->ki_pos = pos + ret; /* - * If we just got an ENOSPC, try to write back all dirty inodes to - * convert delalloc space to free up some of the excess reserved - * metadata space. + * If we hit a space limit, try to free up some lingering preallocated + * space before returning an error. In the case of ENOSPC, first try to + * write back all dirty inodes to free up some of the excess reserved + * metadata space. This reduces the chances that the eofblocks scan + * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this + * also behaves as a filter to prevent too many eofblocks scans from + * running at the same time. */ - if (ret == -ENOSPC && !enospc) { + if (ret == -EDQUOT && !enospc) { + enospc = xfs_inode_free_quota_eofblocks(ip); + if (enospc) + goto write_retry; + } else if (ret == -ENOSPC && !enospc) { + struct xfs_eofblocks eofb = {0}; + enospc = 1; xfs_flush_inodes(ip->i_mount); + eofb.eof_scan_owner = ip->i_ino; /* for locking */ + eofb.eof_flags = XFS_EOF_FLAGS_SYNC; + xfs_icache_free_eofblocks(ip->i_mount, &eofb); goto write_retry; } @@ -755,40 +741,29 @@ out: } STATIC ssize_t -xfs_file_aio_write( +xfs_file_write_iter( struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos) + struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct xfs_inode *ip = XFS_I(inode); ssize_t ret; - size_t ocount = 0; + size_t ocount = iov_iter_count(from); XFS_STATS_INC(xs_write_calls); - BUG_ON(iocb->ki_pos != pos); - - ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); - if (ret) - return ret; - if (ocount == 0) return 0; - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { - ret = -EIO; - goto out; - } + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return -EIO; if (unlikely(file->f_flags & O_DIRECT)) - ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount); + ret = xfs_file_dio_aio_write(iocb, from); else - ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, - ocount); + ret = xfs_file_buffered_aio_write(iocb, from); if (ret > 0) { ssize_t err; @@ -796,12 +771,10 @@ xfs_file_aio_write( XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ - err = generic_write_sync(file, pos, ret); + err = generic_write_sync(file, iocb->ki_pos - ret, ret); if (err < 0) ret = err; } - -out: return ret; } @@ -820,7 +793,8 @@ xfs_file_fallocate( if (!S_ISREG(inode->i_mode)) return -EINVAL; - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | + FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) return -EOPNOTSUPP; xfs_ilock(ip, XFS_IOLOCK_EXCL); @@ -828,17 +802,42 @@ xfs_file_fallocate( error = xfs_free_file_space(ip, offset, len); if (error) goto out_unlock; + } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { + unsigned blksize_mask = (1 << inode->i_blkbits) - 1; + + if (offset & blksize_mask || len & blksize_mask) { + error = -EINVAL; + goto out_unlock; + } + + /* + * There is no need to overlap collapse range with EOF, + * in which case it is effectively a truncate operation + */ + if (offset + len >= i_size_read(inode)) { + error = -EINVAL; + goto out_unlock; + } + + new_size = i_size_read(inode) - len; + + error = xfs_collapse_file_space(ip, offset, len); + if (error) + goto out_unlock; } else { if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > i_size_read(inode)) { new_size = offset + len; - error = -inode_newsize_ok(inode, new_size); + error = inode_newsize_ok(inode, new_size); if (error) goto out_unlock; } - error = xfs_alloc_file_space(ip, offset, len, - XFS_BMAPI_PREALLOC); + if (mode & FALLOC_FL_ZERO_RANGE) + error = xfs_zero_file_space(ip, offset, len); + else + error = xfs_alloc_file_space(ip, offset, len, + XFS_BMAPI_PREALLOC); if (error) goto out_unlock; } @@ -856,7 +855,7 @@ xfs_file_fallocate( if (ip->i_d.di_mode & S_IXGRP) ip->i_d.di_mode &= ~S_ISGID; - if (!(mode & FALLOC_FL_PUNCH_HOLE)) + if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE))) ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -879,7 +878,7 @@ xfs_file_fallocate( out_unlock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return -error; + return error; } @@ -912,9 +911,9 @@ xfs_dir_open( * If there are any blocks, read-ahead block 0 as we're almost * certain to have the next operation be a read there. */ - mode = xfs_ilock_map_shared(ip); + mode = xfs_ilock_data_map_shared(ip); if (ip->i_d.di_nextents > 0) - xfs_dir3_data_readahead(NULL, ip, 0, -1); + xfs_dir3_data_readahead(ip, 0, -1); xfs_iunlock(ip, mode); return 0; } @@ -924,7 +923,7 @@ xfs_file_release( struct inode *inode, struct file *filp) { - return -xfs_release(XFS_I(inode)); + return xfs_release(XFS_I(inode)); } STATIC int @@ -953,7 +952,7 @@ xfs_file_readdir( error = xfs_readdir(ip, ctx, bufsize); if (error) - return -error; + return error; return 0; } @@ -1215,11 +1214,11 @@ xfs_seek_data( uint lock; int error; - lock = xfs_ilock_map_shared(ip); + lock = xfs_ilock_data_map_shared(ip); isize = i_size_read(inode); if (start >= isize) { - error = ENXIO; + error = -ENXIO; goto out_unlock; } @@ -1241,7 +1240,7 @@ xfs_seek_data( /* No extents at given offset, must be beyond EOF */ if (nmap == 0) { - error = ENXIO; + error = -ENXIO; goto out_unlock; } @@ -1272,7 +1271,7 @@ xfs_seek_data( * we are reading after EOF if nothing in map[1]. */ if (nmap == 1) { - error = ENXIO; + error = -ENXIO; goto out_unlock; } @@ -1285,7 +1284,7 @@ xfs_seek_data( fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; start = XFS_FSB_TO_B(mp, fsbno); if (start >= isize) { - error = ENXIO; + error = -ENXIO; goto out_unlock; } } @@ -1294,10 +1293,10 @@ out: offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); out_unlock: - xfs_iunlock_map_shared(ip, lock); + xfs_iunlock(ip, lock); if (error) - return -error; + return error; return offset; } @@ -1317,13 +1316,13 @@ xfs_seek_hole( int error; if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); + return -EIO; - lock = xfs_ilock_map_shared(ip); + lock = xfs_ilock_data_map_shared(ip); isize = i_size_read(inode); if (start >= isize) { - error = ENXIO; + error = -ENXIO; goto out_unlock; } @@ -1342,7 +1341,7 @@ xfs_seek_hole( /* No extents at given offset, must be beyond EOF */ if (nmap == 0) { - error = ENXIO; + error = -ENXIO; goto out_unlock; } @@ -1402,10 +1401,10 @@ out: offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); out_unlock: - xfs_iunlock_map_shared(ip, lock); + xfs_iunlock(ip, lock); if (error) - return -error; + return error; return offset; } @@ -1431,12 +1430,12 @@ xfs_file_llseek( const struct file_operations xfs_file_operations = { .llseek = xfs_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = xfs_file_aio_read, - .aio_write = xfs_file_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = xfs_file_read_iter, + .write_iter = xfs_file_write_iter, .splice_read = xfs_file_splice_read, - .splice_write = xfs_file_splice_write, + .splice_write = iter_file_splice_write, .unlocked_ioctl = xfs_file_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = xfs_file_compat_ioctl, @@ -1462,6 +1461,7 @@ const struct file_operations xfs_dir_file_operations = { static const struct vm_operations_struct xfs_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = xfs_vm_page_mkwrite, .remap_pages = generic_file_remap_pages, }; |