diff options
Diffstat (limited to 'fs/ext4/file.c')
-rw-r--r-- | fs/ext4/file.c | 235 |
1 files changed, 119 insertions, 116 deletions
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 113837e7ba98..fa2208bae2e1 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -93,31 +93,29 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(iocb->ki_filp); - struct mutex *aio_mutex = NULL; struct blk_plug plug; int o_direct = iocb->ki_flags & IOCB_DIRECT; + int unaligned_aio = 0; int overwrite = 0; ssize_t ret; + inode_lock(inode); + ret = generic_write_checks(iocb, from); + if (ret <= 0) + goto out; + /* - * Unaligned direct AIO must be serialized; see comment above - * In the case of O_APPEND, assume that we must always serialize + * Unaligned direct AIO must be serialized among each other as zeroing + * of partial blocks of two competing unaligned AIOs can result in data + * corruption. */ - if (o_direct && - ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && + if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && !is_sync_kiocb(iocb) && - (iocb->ki_flags & IOCB_APPEND || - ext4_unaligned_aio(inode, from, iocb->ki_pos))) { - aio_mutex = ext4_aio_mutex(inode); - mutex_lock(aio_mutex); + ext4_unaligned_aio(inode, from, iocb->ki_pos)) { + unaligned_aio = 1; ext4_unwritten_wait(inode); } - mutex_lock(&inode->i_mutex); - ret = generic_write_checks(iocb, from); - if (ret <= 0) - goto out; - /* * If we have encountered a bitmap-format file, the size limit * is smaller than s_maxbytes, which is for extent-mapped files. @@ -139,7 +137,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) blk_start_plug(&plug); /* check whether we do a DIO overwrite or not */ - if (ext4_should_dioread_nolock(inode) && !aio_mutex && + if (ext4_should_dioread_nolock(inode) && !unaligned_aio && !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) { struct ext4_map_blocks map; unsigned int blkbits = inode->i_blkbits; @@ -169,7 +167,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } ret = __generic_file_write_iter(iocb, from); - mutex_unlock(&inode->i_mutex); + inode_unlock(inode); if (ret > 0) { ssize_t err; @@ -181,55 +179,43 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (o_direct) blk_finish_plug(&plug); - if (aio_mutex) - mutex_unlock(aio_mutex); return ret; out: - mutex_unlock(&inode->i_mutex); - if (aio_mutex) - mutex_unlock(aio_mutex); + inode_unlock(inode); return ret; } #ifdef CONFIG_FS_DAX -static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate) -{ - struct inode *inode = bh->b_assoc_map->host; - /* XXX: breaks on 32-bit > 16TB. Is that even supported? */ - loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits; - int err; - if (!uptodate) - return; - WARN_ON(!buffer_unwritten(bh)); - err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size); -} - static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { int result; handle_t *handle = NULL; - struct super_block *sb = file_inode(vma->vm_file)->i_sb; + struct inode *inode = file_inode(vma->vm_file); + struct super_block *sb = inode->i_sb; bool write = vmf->flags & FAULT_FLAG_WRITE; if (write) { sb_start_pagefault(sb); file_update_time(vma->vm_file); + down_read(&EXT4_I(inode)->i_mmap_sem); handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, EXT4_DATA_TRANS_BLOCKS(sb)); - } + } else + down_read(&EXT4_I(inode)->i_mmap_sem); if (IS_ERR(handle)) result = VM_FAULT_SIGBUS; else - result = __dax_fault(vma, vmf, ext4_get_block_dax, - ext4_end_io_unwritten); + result = __dax_fault(vma, vmf, ext4_dax_mmap_get_block, NULL); if (write) { if (!IS_ERR(handle)) ext4_journal_stop(handle); + up_read(&EXT4_I(inode)->i_mmap_sem); sb_end_pagefault(sb); - } + } else + up_read(&EXT4_I(inode)->i_mmap_sem); return result; } @@ -246,44 +232,73 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, if (write) { sb_start_pagefault(sb); file_update_time(vma->vm_file); + down_read(&EXT4_I(inode)->i_mmap_sem); handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, ext4_chunk_trans_blocks(inode, PMD_SIZE / PAGE_SIZE)); - } + } else + down_read(&EXT4_I(inode)->i_mmap_sem); if (IS_ERR(handle)) result = VM_FAULT_SIGBUS; else result = __dax_pmd_fault(vma, addr, pmd, flags, - ext4_get_block_dax, ext4_end_io_unwritten); + ext4_dax_mmap_get_block, NULL); if (write) { if (!IS_ERR(handle)) ext4_journal_stop(handle); + up_read(&EXT4_I(inode)->i_mmap_sem); sb_end_pagefault(sb); - } + } else + up_read(&EXT4_I(inode)->i_mmap_sem); return result; } -static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +/* + * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_fault() + * handler we check for races agaist truncate. Note that since we cycle through + * i_mmap_sem, we are sure that also any hole punching that began before we + * were called is finished by now and so if it included part of the file we + * are working on, our pte will get unmapped and the check for pte_same() in + * wp_pfn_shared() fails. Thus fault gets retried and things work out as + * desired. + */ +static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma, + struct vm_fault *vmf) { - return dax_mkwrite(vma, vmf, ext4_get_block_dax, - ext4_end_io_unwritten); + struct inode *inode = file_inode(vma->vm_file); + struct super_block *sb = inode->i_sb; + loff_t size; + int ret; + + sb_start_pagefault(sb); + file_update_time(vma->vm_file); + down_read(&EXT4_I(inode)->i_mmap_sem); + size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (vmf->pgoff >= size) + ret = VM_FAULT_SIGBUS; + else + ret = dax_pfn_mkwrite(vma, vmf); + up_read(&EXT4_I(inode)->i_mmap_sem); + sb_end_pagefault(sb); + + return ret; } static const struct vm_operations_struct ext4_dax_vm_ops = { .fault = ext4_dax_fault, .pmd_fault = ext4_dax_pmd_fault, - .page_mkwrite = ext4_dax_mkwrite, - .pfn_mkwrite = dax_pfn_mkwrite, + .page_mkwrite = ext4_dax_fault, + .pfn_mkwrite = ext4_dax_pfn_mkwrite, }; #else #define ext4_dax_vm_ops ext4_file_vm_ops #endif static const struct vm_operations_struct ext4_file_vm_ops = { - .fault = filemap_fault, + .fault = ext4_filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = ext4_page_mkwrite, }; @@ -314,6 +329,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) struct super_block *sb = inode->i_sb; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct vfsmount *mnt = filp->f_path.mnt; + struct dentry *dir; struct path path; char buf[64], *cp; int ret; @@ -357,6 +373,18 @@ static int ext4_file_open(struct inode * inode, struct file * filp) if (ext4_encryption_info(inode) == NULL) return -ENOKEY; } + + dir = dget_parent(file_dentry(filp)); + if (ext4_encrypted_inode(d_inode(dir)) && + !ext4_is_child_context_consistent_with_parent(d_inode(dir), inode)) { + ext4_warning(inode->i_sb, + "Inconsistent encryption contexts: %lu/%lu\n", + (unsigned long) d_inode(dir)->i_ino, + (unsigned long) inode->i_ino); + dput(dir); + return -EPERM; + } + dput(dir); /* * Set up the jbd2_inode if we are opening the inode for * writing and the journal is present @@ -387,7 +415,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) */ static int ext4_find_unwritten_pgoff(struct inode *inode, int whence, - struct ext4_map_blocks *map, + ext4_lblk_t end_blk, loff_t *offset) { struct pagevec pvec; @@ -402,10 +430,10 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, blkbits = inode->i_sb->s_blocksize_bits; startoff = *offset; lastoff = startoff; - endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits; + endoff = (loff_t)end_blk << blkbits; - index = startoff >> PAGE_CACHE_SHIFT; - end = endoff >> PAGE_CACHE_SHIFT; + index = startoff >> PAGE_SHIFT; + end = endoff >> PAGE_SHIFT; pagevec_init(&pvec, 0); do { @@ -520,18 +548,17 @@ out: static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) { struct inode *inode = file->f_mapping->host; - struct ext4_map_blocks map; struct extent_status es; ext4_lblk_t start, last, end; loff_t dataoff, isize; int blkbits; - int ret = 0; + int ret; - mutex_lock(&inode->i_mutex); + inode_lock(inode); isize = i_size_read(inode); if (offset >= isize) { - mutex_unlock(&inode->i_mutex); + inode_unlock(inode); return -ENXIO; } @@ -542,44 +569,35 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) dataoff = offset; do { - map.m_lblk = last; - map.m_len = end - last + 1; - ret = ext4_map_blocks(NULL, inode, &map, 0); - if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { - if (last != start) - dataoff = (loff_t)last << blkbits; - break; + ret = ext4_get_next_extent(inode, last, end - last + 1, &es); + if (ret <= 0) { + /* No extent found -> no data */ + if (ret == 0) + ret = -ENXIO; + inode_unlock(inode); + return ret; } - /* - * If there is a delay extent at this offset, - * it will be as a data. - */ - ext4_es_find_delayed_extent_range(inode, last, last, &es); - if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { - if (last != start) - dataoff = (loff_t)last << blkbits; + last = es.es_lblk; + if (last != start) + dataoff = (loff_t)last << blkbits; + if (!ext4_es_is_unwritten(&es)) break; - } /* * If there is a unwritten extent at this offset, * it will be as a data or a hole according to page * cache that has data or not. */ - if (map.m_flags & EXT4_MAP_UNWRITTEN) { - int unwritten; - unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA, - &map, &dataoff); - if (unwritten) - break; - } - - last++; + if (ext4_find_unwritten_pgoff(inode, SEEK_DATA, + es.es_lblk + es.es_len, &dataoff)) + break; + last += es.es_len; dataoff = (loff_t)last << blkbits; + cond_resched(); } while (last <= end); - mutex_unlock(&inode->i_mutex); + inode_unlock(inode); if (dataoff > isize) return -ENXIO; @@ -593,18 +611,17 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) { struct inode *inode = file->f_mapping->host; - struct ext4_map_blocks map; struct extent_status es; ext4_lblk_t start, last, end; loff_t holeoff, isize; int blkbits; - int ret = 0; + int ret; - mutex_lock(&inode->i_mutex); + inode_lock(inode); isize = i_size_read(inode); if (offset >= isize) { - mutex_unlock(&inode->i_mutex); + inode_unlock(inode); return -ENXIO; } @@ -615,47 +632,33 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) holeoff = offset; do { - map.m_lblk = last; - map.m_len = end - last + 1; - ret = ext4_map_blocks(NULL, inode, &map, 0); - if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { - last += ret; - holeoff = (loff_t)last << blkbits; - continue; + ret = ext4_get_next_extent(inode, last, end - last + 1, &es); + if (ret < 0) { + inode_unlock(inode); + return ret; } - - /* - * If there is a delay extent at this offset, - * we will skip this extent. - */ - ext4_es_find_delayed_extent_range(inode, last, last, &es); - if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { - last = es.es_lblk + es.es_len; - holeoff = (loff_t)last << blkbits; - continue; + /* Found a hole? */ + if (ret == 0 || es.es_lblk > last) { + if (last != start) + holeoff = (loff_t)last << blkbits; + break; } - /* * If there is a unwritten extent at this offset, * it will be as a data or a hole according to page * cache that has data or not. */ - if (map.m_flags & EXT4_MAP_UNWRITTEN) { - int unwritten; - unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE, - &map, &holeoff); - if (!unwritten) { - last += ret; - holeoff = (loff_t)last << blkbits; - continue; - } - } + if (ext4_es_is_unwritten(&es) && + ext4_find_unwritten_pgoff(inode, SEEK_HOLE, + last + es.es_len, &holeoff)) + break; - /* find a hole */ - break; + last += es.es_len; + holeoff = (loff_t)last << blkbits; + cond_resched(); } while (last <= end); - mutex_unlock(&inode->i_mutex); + inode_unlock(inode); if (holeoff > isize) holeoff = isize; |