aboutsummaryrefslogtreecommitdiffstats
path: root/fs/f2fs/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/f2fs/file.c')
-rw-r--r--fs/f2fs/file.c509
1 files changed, 415 insertions, 94 deletions
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 92ec2699bc85..3c98ef6af97d 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -24,6 +24,7 @@
#include <linux/sched/signal.h>
#include <linux/fileattr.h>
#include <linux/fadvise.h>
+#include <linux/iomap.h>
#include "f2fs.h"
#include "node.h"
@@ -1232,7 +1233,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
if (ret)
return ret;
- ret = f2fs_get_node_info(sbi, dn.nid, &ni);
+ ret = f2fs_get_node_info(sbi, dn.nid, &ni, false);
if (ret) {
f2fs_put_dnode(&dn);
return ret;
@@ -1687,6 +1688,7 @@ next_alloc:
map.m_seg_type = CURSEG_COLD_DATA_PINNED;
err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
+ file_dont_truncate(inode);
up_write(&sbi->pin_sem);
@@ -1748,7 +1750,11 @@ static long f2fs_fallocate(struct file *file, int mode,
(mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
return -EOPNOTSUPP;
- if (f2fs_compressed_file(inode) &&
+ /*
+ * Pinned file should not support partial trucation since the block
+ * can be used by applications.
+ */
+ if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) &&
(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE |
FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE)))
return -EOPNOTSUPP;
@@ -3143,17 +3149,17 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
inode_lock(inode);
- if (f2fs_should_update_outplace(inode, NULL)) {
- ret = -EINVAL;
- goto out;
- }
-
if (!pin) {
clear_inode_flag(inode, FI_PIN_FILE);
f2fs_i_gc_failures_write(inode, 0);
goto done;
}
+ if (f2fs_should_update_outplace(inode, NULL)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
if (f2fs_pin_file_control(inode, false)) {
ret = -EAGAIN;
goto out;
@@ -4218,27 +4224,385 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return __f2fs_ioctl(filp, cmd, arg);
}
-static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+/*
+ * Return %true if the given read or write request should use direct I/O, or
+ * %false if it should use buffered I/O.
+ */
+static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb,
+ struct iov_iter *iter)
+{
+ unsigned int align;
+
+ if (!(iocb->ki_flags & IOCB_DIRECT))
+ return false;
+
+ if (f2fs_force_buffered_io(inode, iocb, iter))
+ return false;
+
+ /*
+ * Direct I/O not aligned to the disk's logical_block_size will be
+ * attempted, but will fail with -EINVAL.
+ *
+ * f2fs additionally requires that direct I/O be aligned to the
+ * filesystem block size, which is often a stricter requirement.
+ * However, f2fs traditionally falls back to buffered I/O on requests
+ * that are logical_block_size-aligned but not fs-block aligned.
+ *
+ * The below logic implements this behavior.
+ */
+ align = iocb->ki_pos | iov_iter_alignment(iter);
+ if (!IS_ALIGNED(align, i_blocksize(inode)) &&
+ IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev)))
+ return false;
+
+ return true;
+}
+
+static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error,
+ unsigned int flags)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
+
+ dec_page_count(sbi, F2FS_DIO_READ);
+ if (error)
+ return error;
+ f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, size);
+ return 0;
+}
+
+static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = {
+ .end_io = f2fs_dio_read_end_io,
+};
+
+static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- int ret;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ const loff_t pos = iocb->ki_pos;
+ const size_t count = iov_iter_count(to);
+ struct iomap_dio *dio;
+ ssize_t ret;
+
+ if (count == 0)
+ return 0; /* skip atime update */
+
+ trace_f2fs_direct_IO_enter(inode, iocb, count, READ);
+
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!down_read_trylock(&fi->i_gc_rwsem[READ])) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ } else {
+ down_read(&fi->i_gc_rwsem[READ]);
+ }
+
+ /*
+ * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
+ * the higher-level function iomap_dio_rw() in order to ensure that the
+ * F2FS_DIO_READ counter will be decremented correctly in all cases.
+ */
+ inc_page_count(sbi, F2FS_DIO_READ);
+ dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops,
+ &f2fs_iomap_dio_read_ops, 0, 0);
+ if (IS_ERR_OR_NULL(dio)) {
+ ret = PTR_ERR_OR_ZERO(dio);
+ if (ret != -EIOCBQUEUED)
+ dec_page_count(sbi, F2FS_DIO_READ);
+ } else {
+ ret = iomap_dio_complete(dio);
+ }
+
+ up_read(&fi->i_gc_rwsem[READ]);
+
+ file_accessed(file);
+out:
+ trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret);
+ return ret;
+}
+
+static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ ssize_t ret;
if (!f2fs_is_compress_backend_ready(inode))
return -EOPNOTSUPP;
- ret = generic_file_read_iter(iocb, iter);
+ if (f2fs_should_use_dio(inode, iocb, to))
+ return f2fs_dio_read_iter(iocb, to);
+ ret = filemap_read(iocb, to, 0);
if (ret > 0)
- f2fs_update_iostat(F2FS_I_SB(inode), APP_READ_IO, ret);
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_READ_IO, ret);
+ return ret;
+}
+
+static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ ssize_t count;
+ int err;
+
+ if (IS_IMMUTABLE(inode))
+ return -EPERM;
+
+ if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
+ return -EPERM;
+
+ count = generic_write_checks(iocb, from);
+ if (count <= 0)
+ return count;
+
+ err = file_modified(file);
+ if (err)
+ return err;
+ return count;
+}
+
+/*
+ * Preallocate blocks for a write request, if it is possible and helpful to do
+ * so. Returns a positive number if blocks may have been preallocated, 0 if no
+ * blocks were preallocated, or a negative errno value if something went
+ * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the
+ * requested blocks (not just some of them) have been allocated.
+ */
+static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
+ bool dio)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ const loff_t pos = iocb->ki_pos;
+ const size_t count = iov_iter_count(iter);
+ struct f2fs_map_blocks map = {};
+ int flag;
+ int ret;
+
+ /* If it will be an out-of-place direct write, don't bother. */
+ if (dio && f2fs_lfs_mode(sbi))
+ return 0;
+ /*
+ * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
+ * buffered IO, if DIO meets any holes.
+ */
+ if (dio && i_size_read(inode) &&
+ (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
+ return 0;
+
+ /* No-wait I/O can't allocate blocks. */
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return 0;
+
+ /* If it will be a short write, don't bother. */
+ if (fault_in_iov_iter_readable(iter, count))
+ return 0;
+
+ if (f2fs_has_inline_data(inode)) {
+ /* If the data will fit inline, don't bother. */
+ if (pos + count <= MAX_INLINE_DATA(inode))
+ return 0;
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
+ }
+
+ /* Do not preallocate blocks that will be written partially in 4KB. */
+ map.m_lblk = F2FS_BLK_ALIGN(pos);
+ map.m_len = F2FS_BYTES_TO_BLK(pos + count);
+ if (map.m_len > map.m_lblk)
+ map.m_len -= map.m_lblk;
+ else
+ map.m_len = 0;
+ map.m_may_create = true;
+ if (dio) {
+ map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
+ flag = F2FS_GET_BLOCK_PRE_DIO;
+ } else {
+ map.m_seg_type = NO_CHECK_TYPE;
+ flag = F2FS_GET_BLOCK_PRE_AIO;
+ }
+
+ ret = f2fs_map_blocks(inode, &map, 1, flag);
+ /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */
+ if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0))
+ return ret;
+ if (ret == 0)
+ set_inode_flag(inode, FI_PREALLOCATED_ALL);
+ return map.m_len;
+}
+
+static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
+ struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ ssize_t ret;
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EOPNOTSUPP;
+
+ current->backing_dev_info = inode_to_bdi(inode);
+ ret = generic_perform_write(file, from, iocb->ki_pos);
+ current->backing_dev_info = NULL;
+
+ if (ret > 0) {
+ iocb->ki_pos += ret;
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_IO, ret);
+ }
return ret;
}
-static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error,
+ unsigned int flags)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
+
+ dec_page_count(sbi, F2FS_DIO_WRITE);
+ if (error)
+ return error;
+ f2fs_update_iostat(sbi, APP_DIRECT_IO, size);
+ return 0;
+}
+
+static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = {
+ .end_io = f2fs_dio_write_end_io,
+};
+
+static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
+ bool *may_need_sync)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ const bool do_opu = f2fs_lfs_mode(sbi);
+ const int whint_mode = F2FS_OPTION(sbi).whint_mode;
+ const loff_t pos = iocb->ki_pos;
+ const ssize_t count = iov_iter_count(from);
+ const enum rw_hint hint = iocb->ki_hint;
+ unsigned int dio_flags;
+ struct iomap_dio *dio;
+ ssize_t ret;
+
+ trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE);
+
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ /* f2fs_convert_inline_inode() and block allocation can block */
+ if (f2fs_has_inline_data(inode) ||
+ !f2fs_overwrite_io(inode, pos, count)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ if (!down_read_trylock(&fi->i_gc_rwsem[WRITE])) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
+ up_read(&fi->i_gc_rwsem[WRITE]);
+ ret = -EAGAIN;
+ goto out;
+ }
+ } else {
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ goto out;
+
+ down_read(&fi->i_gc_rwsem[WRITE]);
+ if (do_opu)
+ down_read(&fi->i_gc_rwsem[READ]);
+ }
+ if (whint_mode == WHINT_MODE_OFF)
+ iocb->ki_hint = WRITE_LIFE_NOT_SET;
+
+ /*
+ * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
+ * the higher-level function iomap_dio_rw() in order to ensure that the
+ * F2FS_DIO_WRITE counter will be decremented correctly in all cases.
+ */
+ inc_page_count(sbi, F2FS_DIO_WRITE);
+ dio_flags = 0;
+ if (pos + count > inode->i_size)
+ dio_flags |= IOMAP_DIO_FORCE_WAIT;
+ dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
+ &f2fs_iomap_dio_write_ops, dio_flags, 0);
+ if (IS_ERR_OR_NULL(dio)) {
+ ret = PTR_ERR_OR_ZERO(dio);
+ if (ret == -ENOTBLK)
+ ret = 0;
+ if (ret != -EIOCBQUEUED)
+ dec_page_count(sbi, F2FS_DIO_WRITE);
+ } else {
+ ret = iomap_dio_complete(dio);
+ }
+
+ if (whint_mode == WHINT_MODE_OFF)
+ iocb->ki_hint = hint;
+ if (do_opu)
+ up_read(&fi->i_gc_rwsem[READ]);
+ up_read(&fi->i_gc_rwsem[WRITE]);
+
+ if (ret < 0)
+ goto out;
+ if (pos + ret > inode->i_size)
+ f2fs_i_size_write(inode, pos + ret);
+ if (!do_opu)
+ set_inode_flag(inode, FI_UPDATE_WRITE);
+
+ if (iov_iter_count(from)) {
+ ssize_t ret2;
+ loff_t bufio_start_pos = iocb->ki_pos;
+
+ /*
+ * The direct write was partial, so we need to fall back to a
+ * buffered write for the remainder.
+ */
+
+ ret2 = f2fs_buffered_write_iter(iocb, from);
+ if (iov_iter_count(from))
+ f2fs_write_failed(inode, iocb->ki_pos);
+ if (ret2 < 0)
+ goto out;
+
+ /*
+ * Ensure that the pagecache pages are written to disk and
+ * invalidated to preserve the expected O_DIRECT semantics.
+ */
+ if (ret2 > 0) {
+ loff_t bufio_end_pos = bufio_start_pos + ret2 - 1;
+
+ ret += ret2;
+
+ ret2 = filemap_write_and_wait_range(file->f_mapping,
+ bufio_start_pos,
+ bufio_end_pos);
+ if (ret2 < 0)
+ goto out;
+ invalidate_mapping_pages(file->f_mapping,
+ bufio_start_pos >> PAGE_SHIFT,
+ bufio_end_pos >> PAGE_SHIFT);
+ }
+ } else {
+ /* iomap_dio_rw() already handled the generic_write_sync(). */
+ *may_need_sync = false;
+ }
+out:
+ trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret);
+ return ret;
+}
+
+static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ const loff_t orig_pos = iocb->ki_pos;
+ const size_t orig_count = iov_iter_count(from);
+ loff_t target_size;
+ bool dio;
+ bool may_need_sync = true;
+ int preallocated;
ssize_t ret;
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
@@ -4260,91 +4624,42 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
inode_lock(inode);
}
- if (unlikely(IS_IMMUTABLE(inode))) {
- ret = -EPERM;
- goto unlock;
- }
-
- if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
- ret = -EPERM;
- goto unlock;
- }
-
- ret = generic_write_checks(iocb, from);
- if (ret > 0) {
- bool preallocated = false;
- size_t target_size = 0;
- int err;
-
- if (fault_in_iov_iter_readable(from, iov_iter_count(from)))
- set_inode_flag(inode, FI_NO_PREALLOC);
-
- if ((iocb->ki_flags & IOCB_NOWAIT)) {
- if (!f2fs_overwrite_io(inode, iocb->ki_pos,
- iov_iter_count(from)) ||
- f2fs_has_inline_data(inode) ||
- f2fs_force_buffered_io(inode, iocb, from)) {
- clear_inode_flag(inode, FI_NO_PREALLOC);
- inode_unlock(inode);
- ret = -EAGAIN;
- goto out;
- }
- goto write;
- }
-
- if (is_inode_flag_set(inode, FI_NO_PREALLOC))
- goto write;
-
- if (iocb->ki_flags & IOCB_DIRECT) {
- /*
- * Convert inline data for Direct I/O before entering
- * f2fs_direct_IO().
- */
- err = f2fs_convert_inline_inode(inode);
- if (err)
- goto out_err;
- /*
- * If force_buffere_io() is true, we have to allocate
- * blocks all the time, since f2fs_direct_IO will fall
- * back to buffered IO.
- */
- if (!f2fs_force_buffered_io(inode, iocb, from) &&
- f2fs_lfs_mode(F2FS_I_SB(inode)))
- goto write;
- }
- preallocated = true;
- target_size = iocb->ki_pos + iov_iter_count(from);
+ ret = f2fs_write_checks(iocb, from);
+ if (ret <= 0)
+ goto out_unlock;
- err = f2fs_preallocate_blocks(iocb, from);
- if (err) {
-out_err:
- clear_inode_flag(inode, FI_NO_PREALLOC);
- inode_unlock(inode);
- ret = err;
- goto out;
- }
-write:
- ret = __generic_file_write_iter(iocb, from);
- clear_inode_flag(inode, FI_NO_PREALLOC);
+ /* Determine whether we will do a direct write or a buffered write. */
+ dio = f2fs_should_use_dio(inode, iocb, from);
- /* if we couldn't write data, we should deallocate blocks. */
- if (preallocated && i_size_read(inode) < target_size) {
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- filemap_invalidate_lock(inode->i_mapping);
- f2fs_truncate(inode);
- filemap_invalidate_unlock(inode->i_mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- }
+ /* Possibly preallocate the blocks for the write. */
+ target_size = iocb->ki_pos + iov_iter_count(from);
+ preallocated = f2fs_preallocate_blocks(iocb, from, dio);
+ if (preallocated < 0)
+ ret = preallocated;
+ else
+ /* Do the actual write. */
+ ret = dio ?
+ f2fs_dio_write_iter(iocb, from, &may_need_sync):
+ f2fs_buffered_write_iter(iocb, from);
- if (ret > 0)
- f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
+ /* Don't leave any preallocated blocks around past i_size. */
+ if (preallocated && i_size_read(inode) < target_size) {
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ filemap_invalidate_lock(inode->i_mapping);
+ if (!f2fs_truncate(inode))
+ file_dont_truncate(inode);
+ filemap_invalidate_unlock(inode->i_mapping);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ } else {
+ file_dont_truncate(inode);
}
-unlock:
+
+ clear_inode_flag(inode, FI_PREALLOCATED_ALL);
+out_unlock:
inode_unlock(inode);
out:
- trace_f2fs_file_write_iter(inode, iocb->ki_pos,
- iov_iter_count(from), ret);
- if (ret > 0)
+ trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret);
+ if (ret > 0 && may_need_sync)
ret = generic_write_sync(iocb, ret);
return ret;
}
@@ -4352,12 +4667,12 @@ out:
static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len,
int advice)
{
- struct inode *inode;
struct address_space *mapping;
struct backing_dev_info *bdi;
+ struct inode *inode = file_inode(filp);
+ int err;
if (advice == POSIX_FADV_SEQUENTIAL) {
- inode = file_inode(filp);
if (S_ISFIFO(inode->i_mode))
return -ESPIPE;
@@ -4374,7 +4689,13 @@ static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len,
return 0;
}
- return generic_fadvise(filp, offset, len, advice);
+ err = generic_fadvise(filp, offset, len, advice);
+ if (!err && advice == POSIX_FADV_DONTNEED &&
+ test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) &&
+ f2fs_compressed_file(inode))
+ f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino);
+
+ return err;
}
#ifdef CONFIG_COMPAT