aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c367
1 files changed, 248 insertions, 119 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9c064727ed62..88d57af1b516 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -37,6 +37,7 @@
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/bitops.h>
+#include <linux/iomap.h>
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -71,10 +72,9 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum,
csum_size);
offset += csum_size;
- csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
- EXT4_INODE_SIZE(inode->i_sb) -
- offset);
}
+ csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
+ EXT4_INODE_SIZE(inode->i_sb) - offset);
}
return csum;
@@ -261,8 +261,15 @@ void ext4_evict_inode(struct inode *inode)
"couldn't mark inode dirty (err %d)", err);
goto stop_handle;
}
- if (inode->i_blocks)
- ext4_truncate(inode);
+ if (inode->i_blocks) {
+ err = ext4_truncate(inode);
+ if (err) {
+ ext4_error(inode->i_sb,
+ "couldn't truncate inode %lu (err %d)",
+ inode->i_ino, err);
+ goto stop_handle;
+ }
+ }
/*
* ext4_ext_truncate() doesn't reserve any slop when it
@@ -654,12 +661,8 @@ found:
if (flags & EXT4_GET_BLOCKS_ZERO &&
map->m_flags & EXT4_MAP_MAPPED &&
map->m_flags & EXT4_MAP_NEW) {
- ext4_lblk_t i;
-
- for (i = 0; i < map->m_len; i++) {
- unmap_underlying_metadata(inode->i_sb->s_bdev,
- map->m_pblk + i);
- }
+ clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
+ map->m_len);
ret = ext4_issue_zeroout(inode, map->m_lblk,
map->m_pblk, map->m_len);
if (ret) {
@@ -767,6 +770,9 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
ext4_update_bh_state(bh, map.m_flags);
bh->b_size = inode->i_sb->s_blocksize * map.m_len;
ret = 0;
+ } else if (ret == 0) {
+ /* hole case, need to fill in bh->b_size */
+ bh->b_size = inode->i_sb->s_blocksize * map.m_len;
}
return ret;
}
@@ -1127,8 +1133,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
if (err)
break;
if (buffer_new(bh)) {
- unmap_underlying_metadata(bh->b_bdev,
- bh->b_blocknr);
+ clean_bdev_bh_alias(bh);
if (PageUptodate(page)) {
clear_buffer_new(bh);
set_buffer_uptodate(bh);
@@ -1166,7 +1171,8 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
if (unlikely(err))
page_zero_new_buffers(page, from, to);
else if (decrypt)
- err = fscrypt_decrypt_page(page);
+ err = fscrypt_decrypt_page(page->mapping->host, page,
+ PAGE_SIZE, 0, page->index);
return err;
}
#endif
@@ -2360,11 +2366,8 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
BUG_ON(map->m_len == 0);
if (map->m_flags & EXT4_MAP_NEW) {
- struct block_device *bdev = inode->i_sb->s_bdev;
- int i;
-
- for (i = 0; i < map->m_len; i++)
- unmap_underlying_metadata(bdev, map->m_pblk + i);
+ clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
+ map->m_len);
}
return 0;
}
@@ -2891,7 +2894,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
index = pos >> PAGE_SHIFT;
- if (ext4_nonda_switch(inode->i_sb)) {
+ if (ext4_nonda_switch(inode->i_sb) ||
+ S_ISLNK(inode->i_mode)) {
*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
return ext4_write_begin(file, mapping, pos,
len, flags, pagep, fsdata);
@@ -3268,53 +3272,159 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
}
#ifdef CONFIG_FS_DAX
-/*
- * Get block function for DAX IO and mmap faults. It takes care of converting
- * unwritten extents to written ones and initializes new / converted blocks
- * to zeros.
- */
-int ext4_dax_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
+static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+ unsigned flags, struct iomap *iomap)
{
+ unsigned int blkbits = inode->i_blkbits;
+ unsigned long first_block = offset >> blkbits;
+ unsigned long last_block = (offset + length - 1) >> blkbits;
+ struct ext4_map_blocks map;
int ret;
- ext4_debug("inode %lu, create flag %d\n", inode->i_ino, create);
- if (!create)
- return _ext4_get_block(inode, iblock, bh_result, 0);
+ if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
+ return -ERANGE;
- ret = ext4_get_block_trans(inode, iblock, bh_result,
- EXT4_GET_BLOCKS_PRE_IO |
- EXT4_GET_BLOCKS_CREATE_ZERO);
- if (ret < 0)
- return ret;
+ map.m_lblk = first_block;
+ map.m_len = last_block - first_block + 1;
- if (buffer_unwritten(bh_result)) {
+ if (!(flags & IOMAP_WRITE)) {
+ ret = ext4_map_blocks(NULL, inode, &map, 0);
+ } else {
+ int dio_credits;
+ handle_t *handle;
+ int retries = 0;
+
+ /* Trim mapping request to maximum we can map at once for DIO */
+ if (map.m_len > DIO_MAX_BLOCKS)
+ map.m_len = DIO_MAX_BLOCKS;
+ dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
+retry:
/*
- * We are protected by i_mmap_sem or i_mutex so we know block
- * cannot go away from under us even though we dropped
- * i_data_sem. Convert extent to written and write zeros there.
+ * Either we allocate blocks and then we don't get unwritten
+ * extent so we have reserved enough credits, or the blocks
+ * are already allocated and unwritten and in that case
+ * extent conversion fits in the credits as well.
*/
- ret = ext4_get_block_trans(inode, iblock, bh_result,
- EXT4_GET_BLOCKS_CONVERT |
- EXT4_GET_BLOCKS_CREATE_ZERO);
- if (ret < 0)
+ handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
+ dio_credits);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ ret = ext4_map_blocks(handle, inode, &map,
+ EXT4_GET_BLOCKS_CREATE_ZERO);
+ if (ret < 0) {
+ ext4_journal_stop(handle);
+ if (ret == -ENOSPC &&
+ ext4_should_retry_alloc(inode->i_sb, &retries))
+ goto retry;
return ret;
+ }
+
+ /*
+ * If we added blocks beyond i_size, we need to make sure they
+ * will get truncated if we crash before updating i_size in
+ * ext4_iomap_end(). For faults we don't need to do that (and
+ * even cannot because for orphan list operations inode_lock is
+ * required) - if we happen to instantiate block beyond i_size,
+ * it is because we race with truncate which has already added
+ * the inode to the orphan list.
+ */
+ if (!(flags & IOMAP_FAULT) && first_block + map.m_len >
+ (i_size_read(inode) + (1 << blkbits) - 1) >> blkbits) {
+ int err;
+
+ err = ext4_orphan_add(handle, inode);
+ if (err < 0) {
+ ext4_journal_stop(handle);
+ return err;
+ }
+ }
+ ext4_journal_stop(handle);
}
- /*
- * At least for now we have to clear BH_New so that DAX code
- * doesn't attempt to zero blocks again in a racy way.
- */
- clear_buffer_new(bh_result);
+
+ iomap->flags = 0;
+ iomap->bdev = inode->i_sb->s_bdev;
+ iomap->offset = first_block << blkbits;
+
+ if (ret == 0) {
+ iomap->type = IOMAP_HOLE;
+ iomap->blkno = IOMAP_NULL_BLOCK;
+ iomap->length = (u64)map.m_len << blkbits;
+ } else {
+ if (map.m_flags & EXT4_MAP_MAPPED) {
+ iomap->type = IOMAP_MAPPED;
+ } else if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+ iomap->type = IOMAP_UNWRITTEN;
+ } else {
+ WARN_ON_ONCE(1);
+ return -EIO;
+ }
+ iomap->blkno = (sector_t)map.m_pblk << (blkbits - 9);
+ iomap->length = (u64)map.m_len << blkbits;
+ }
+
+ if (map.m_flags & EXT4_MAP_NEW)
+ iomap->flags |= IOMAP_F_NEW;
return 0;
}
-#else
-/* Just define empty function, it will never get called. */
-int ext4_dax_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
+
+static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
+ ssize_t written, unsigned flags, struct iomap *iomap)
{
- BUG();
- return 0;
+ int ret = 0;
+ handle_t *handle;
+ int blkbits = inode->i_blkbits;
+ bool truncate = false;
+
+ if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
+ return 0;
+
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto orphan_del;
+ }
+ if (ext4_update_inode_size(inode, offset + written))
+ ext4_mark_inode_dirty(handle, inode);
+ /*
+ * We may need to truncate allocated but not written blocks beyond EOF.
+ */
+ if (iomap->offset + iomap->length >
+ ALIGN(inode->i_size, 1 << blkbits)) {
+ ext4_lblk_t written_blk, end_blk;
+
+ written_blk = (offset + written) >> blkbits;
+ end_blk = (offset + length) >> blkbits;
+ if (written_blk < end_blk && ext4_can_truncate(inode))
+ truncate = true;
+ }
+ /*
+ * Remove inode from orphan list if we were extending a inode and
+ * everything went fine.
+ */
+ if (!truncate && inode->i_nlink &&
+ !list_empty(&EXT4_I(inode)->i_orphan))
+ ext4_orphan_del(handle, inode);
+ ext4_journal_stop(handle);
+ if (truncate) {
+ ext4_truncate_failed_write(inode);
+orphan_del:
+ /*
+ * If truncate failed early the inode might still be on the
+ * orphan list; we need to make sure the inode is removed from
+ * the orphan list in that case.
+ */
+ if (inode->i_nlink)
+ ext4_orphan_del(NULL, inode);
+ }
+ return ret;
}
+
+struct iomap_ops ext4_iomap_ops = {
+ .iomap_begin = ext4_iomap_begin,
+ .iomap_end = ext4_iomap_end,
+};
+
#endif
static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
@@ -3436,19 +3546,7 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
iocb->private = NULL;
if (overwrite)
get_block_func = ext4_dio_get_block_overwrite;
- else if (IS_DAX(inode)) {
- /*
- * We can avoid zeroing for aligned DAX writes beyond EOF. Other
- * writes need zeroing either because they can race with page
- * faults or because they use partial blocks.
- */
- if (round_down(offset, 1<<inode->i_blkbits) >= inode->i_size &&
- ext4_aligned_io(inode, offset, count))
- get_block_func = ext4_dio_get_block;
- else
- get_block_func = ext4_dax_get_block;
- dio_flags = DIO_LOCKING;
- } else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
+ else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) {
get_block_func = ext4_dio_get_block;
dio_flags = DIO_LOCKING | DIO_SKIP_HOLES;
@@ -3462,14 +3560,9 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
#ifdef CONFIG_EXT4_FS_ENCRYPTION
BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
#endif
- if (IS_DAX(inode)) {
- ret = dax_do_io(iocb, inode, iter, get_block_func,
- ext4_end_io_dio, dio_flags);
- } else
- ret = __blockdev_direct_IO(iocb, inode,
- inode->i_sb->s_bdev, iter,
- get_block_func,
- ext4_end_io_dio, NULL, dio_flags);
+ ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
+ get_block_func, ext4_end_io_dio, NULL,
+ dio_flags);
if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN)) {
@@ -3538,6 +3631,7 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
+ size_t count = iov_iter_count(iter);
ssize_t ret;
/*
@@ -3546,19 +3640,12 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
* we are protected against page writeback as well.
*/
inode_lock_shared(inode);
- if (IS_DAX(inode)) {
- ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, 0);
- } else {
- size_t count = iov_iter_count(iter);
-
- ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
- iocb->ki_pos + count);
- if (ret)
- goto out_unlock;
- ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
- iter, ext4_dio_get_block,
- NULL, NULL, 0);
- }
+ ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
+ iocb->ki_pos + count);
+ if (ret)
+ goto out_unlock;
+ ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
+ iter, ext4_dio_get_block, NULL, NULL, 0);
out_unlock:
inode_unlock_shared(inode);
return ret;
@@ -3587,6 +3674,10 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
if (ext4_has_inline_data(inode))
return 0;
+ /* DAX uses iomap path now */
+ if (WARN_ON_ONCE(IS_DAX(inode)))
+ return 0;
+
trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
if (iov_iter_rw(iter) == READ)
ret = ext4_direct_IO_read(iocb, iter);
@@ -3615,6 +3706,13 @@ static int ext4_journalled_set_page_dirty(struct page *page)
return __set_page_dirty_nobuffers(page);
}
+static int ext4_set_page_dirty(struct page *page)
+{
+ WARN_ON_ONCE(!PageLocked(page) && !PageDirty(page));
+ WARN_ON_ONCE(!page_has_buffers(page));
+ return __set_page_dirty_buffers(page);
+}
+
static const struct address_space_operations ext4_aops = {
.readpage = ext4_readpage,
.readpages = ext4_readpages,
@@ -3622,6 +3720,7 @@ static const struct address_space_operations ext4_aops = {
.writepages = ext4_writepages,
.write_begin = ext4_write_begin,
.write_end = ext4_write_end,
+ .set_page_dirty = ext4_set_page_dirty,
.bmap = ext4_bmap,
.invalidatepage = ext4_invalidatepage,
.releasepage = ext4_releasepage,
@@ -3654,6 +3753,7 @@ static const struct address_space_operations ext4_da_aops = {
.writepages = ext4_writepages,
.write_begin = ext4_da_write_begin,
.write_end = ext4_da_write_end,
+ .set_page_dirty = ext4_set_page_dirty,
.bmap = ext4_bmap,
.invalidatepage = ext4_da_invalidatepage,
.releasepage = ext4_releasepage,
@@ -3743,7 +3843,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
/* We expect the key to be set. */
BUG_ON(!fscrypt_has_encryption_key(inode));
BUG_ON(blocksize != PAGE_SIZE);
- WARN_ON_ONCE(fscrypt_decrypt_page(page));
+ WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host,
+ page, PAGE_SIZE, 0, page->index));
}
}
if (ext4_should_journal_data(inode)) {
@@ -3792,8 +3893,10 @@ static int ext4_block_zero_page_range(handle_t *handle,
if (length > max || length < 0)
length = max;
- if (IS_DAX(inode))
- return dax_zero_page_range(inode, from, length, ext4_get_block);
+ if (IS_DAX(inode)) {
+ return iomap_zero_range(inode, from, length, NULL,
+ &ext4_iomap_ops);
+ }
return __ext4_block_zero_page_range(handle, mapping, from, length);
}
@@ -4026,7 +4129,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
if (IS_SYNC(inode))
ext4_handle_sync(handle);
- inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+ inode->i_mtime = inode->i_ctime = current_time(inode);
ext4_mark_inode_dirty(handle, inode);
out_stop:
ext4_journal_stop(handle);
@@ -4091,10 +4194,11 @@ int ext4_inode_attach_jinode(struct inode *inode)
* that's fine - as long as they are linked from the inode, the post-crash
* ext4_truncate() run will find them and release them.
*/
-void ext4_truncate(struct inode *inode)
+int ext4_truncate(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int credits;
+ int err = 0;
handle_t *handle;
struct address_space *mapping = inode->i_mapping;
@@ -4108,7 +4212,7 @@ void ext4_truncate(struct inode *inode)
trace_ext4_truncate_enter(inode);
if (!ext4_can_truncate(inode))
- return;
+ return 0;
ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
@@ -4120,13 +4224,13 @@ void ext4_truncate(struct inode *inode)
ext4_inline_data_truncate(inode, &has_inline);
if (has_inline)
- return;
+ return 0;
}
/* If we zero-out tail of the page, we have to create jinode for jbd2 */
if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
if (ext4_inode_attach_jinode(inode) < 0)
- return;
+ return 0;
}
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
@@ -4135,10 +4239,8 @@ void ext4_truncate(struct inode *inode)
credits = ext4_blocks_for_truncate(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
- if (IS_ERR(handle)) {
- ext4_std_error(inode->i_sb, PTR_ERR(handle));
- return;
- }
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
if (inode->i_size & (inode->i_sb->s_blocksize - 1))
ext4_block_truncate_page(handle, mapping, inode->i_size);
@@ -4152,7 +4254,8 @@ void ext4_truncate(struct inode *inode)
* Implication: the file must always be in a sane, consistent
* truncatable state while each transaction commits.
*/
- if (ext4_orphan_add(handle, inode))
+ err = ext4_orphan_add(handle, inode);
+ if (err)
goto out_stop;
down_write(&EXT4_I(inode)->i_data_sem);
@@ -4160,11 +4263,13 @@ void ext4_truncate(struct inode *inode)
ext4_discard_preallocations(inode);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
- ext4_ext_truncate(handle, inode);
+ err = ext4_ext_truncate(handle, inode);
else
ext4_ind_truncate(handle, inode);
up_write(&ei->i_data_sem);
+ if (err)
+ goto out_stop;
if (IS_SYNC(inode))
ext4_handle_sync(handle);
@@ -4180,11 +4285,12 @@ out_stop:
if (inode->i_nlink)
ext4_orphan_del(handle, inode);
- inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+ inode->i_mtime = inode->i_ctime = current_time(inode);
ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
trace_ext4_truncate_exit(inode);
+ return err;
}
/*
@@ -4352,7 +4458,9 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
- if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
+ if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode) &&
+ !ext4_should_journal_data(inode) && !ext4_has_inline_data(inode) &&
+ !ext4_encrypted_inode(inode))
new_fl |= S_DAX;
inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
@@ -4411,7 +4519,9 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
{
__le32 *magic = (void *)raw_inode +
EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize;
- if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
+ if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize + sizeof(__le32) <=
+ EXT4_INODE_SIZE(inode->i_sb) &&
+ *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
ext4_set_inode_state(inode, EXT4_STATE_XATTR);
ext4_find_inline_data_nolock(inode);
} else
@@ -4434,6 +4544,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
struct inode *inode;
journal_t *journal = EXT4_SB(sb)->s_journal;
long ret;
+ loff_t size;
int block;
uid_t i_uid;
gid_t i_gid;
@@ -4456,10 +4567,12 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
- EXT4_INODE_SIZE(inode->i_sb)) {
- EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)",
- EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize,
- EXT4_INODE_SIZE(inode->i_sb));
+ EXT4_INODE_SIZE(inode->i_sb) ||
+ (ei->i_extra_isize & 3)) {
+ EXT4_ERROR_INODE(inode,
+ "bad extra_isize %u (inode size %u)",
+ ei->i_extra_isize,
+ EXT4_INODE_SIZE(inode->i_sb));
ret = -EFSCORRUPTED;
goto bad_inode;
}
@@ -4534,6 +4647,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ei->i_file_acl |=
((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
inode->i_size = ext4_isize(raw_inode);
+ if ((size = i_size_read(inode)) < 0) {
+ EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
+ ret = -EFSCORRUPTED;
+ goto bad_inode;
+ }
ei->i_disksize = inode->i_size;
#ifdef CONFIG_QUOTA
ei->i_reserved_quota = 0;
@@ -4577,6 +4695,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
if (ei->i_extra_isize == 0) {
/* The extra space is currently unused. Use it. */
+ BUILD_BUG_ON(sizeof(struct ext4_inode) & 3);
ei->i_extra_isize = sizeof(struct ext4_inode) -
EXT4_GOOD_OLD_INODE_SIZE;
} else {
@@ -5154,7 +5273,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
* update c/mtime in shrink case below
*/
if (!shrink) {
- inode->i_mtime = ext4_current_time(inode);
+ inode->i_mtime = current_time(inode);
inode->i_ctime = inode->i_mtime;
}
down_write(&EXT4_I(inode)->i_data_sem);
@@ -5199,12 +5318,15 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
* in data=journal mode to make pages freeable.
*/
truncate_pagecache(inode, inode->i_size);
- if (shrink)
- ext4_truncate(inode);
+ if (shrink) {
+ rc = ext4_truncate(inode);
+ if (rc)
+ error = rc;
+ }
up_write(&EXT4_I(inode)->i_mmap_sem);
}
- if (!rc) {
+ if (!error) {
setattr_copy(inode, attr);
mark_inode_dirty(inode);
}
@@ -5216,7 +5338,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (orphan && inode->i_nlink)
ext4_orphan_del(NULL, inode);
- if (!rc && (ia_valid & ATTR_MODE))
+ if (!error && (ia_valid & ATTR_MODE))
rc = posix_acl_chmod(inode, inode->i_mode);
err_out:
@@ -5455,18 +5577,20 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (err)
return err;
- if (ext4_handle_valid(handle) &&
- EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
+ if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
!ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
/*
- * We need extra buffer credits since we may write into EA block
+ * In nojournal mode, we can immediately attempt to expand
+ * the inode. When journaled, we first need to obtain extra
+ * buffer credits since we may write into the EA block
* with this same handle. If journal_extend fails, then it will
* only result in a minor loss of functionality for that inode.
* If this is felt to be critical, then e2fsck should be run to
* force a large enough s_min_extra_isize.
*/
- if ((jbd2_journal_extend(handle,
- EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) {
+ if (!ext4_handle_valid(handle) ||
+ jbd2_journal_extend(handle,
+ EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) == 0) {
ret = ext4_expand_extra_isize(inode,
sbi->s_want_extra_isize,
iloc, handle);
@@ -5620,6 +5744,11 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
}
ext4_set_aops(inode);
+ /*
+ * Update inode->i_flags after EXT4_INODE_JOURNAL_DATA was updated.
+ * E.g. S_DAX may get cleared / set.
+ */
+ ext4_set_inode_flags(inode);
jbd2_journal_unlock_updates(journal);
percpu_up_write(&sbi->s_journal_flag_rwsem);