aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/dir-item.c2
-rw-r--r--fs/btrfs/disk-io.c10
-rw-r--r--fs/btrfs/extent-tree.c7
-rw-r--r--fs/btrfs/extent_io.c126
-rw-r--r--fs/btrfs/hash.c5
-rw-r--r--fs/btrfs/inode.c6
-rw-r--r--fs/ceph/file.c6
-rw-r--r--fs/dax.c23
-rw-r--r--fs/ext2/inode.c4
-rw-r--r--fs/ext4/acl.c4
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/extents.c85
-rw-r--r--fs/ext4/file.c54
-rw-r--r--fs/ext4/inline.c5
-rw-r--r--fs/ext4/inode.c34
-rw-r--r--fs/ext4/mballoc.c23
-rw-r--r--fs/ext4/namei.c13
-rw-r--r--fs/ext4/super.c17
-rw-r--r--fs/ext4/xattr.c8
-rw-r--r--fs/f2fs/f2fs.h5
-rw-r--r--fs/fuse/inode.c9
-rw-r--r--fs/gfs2/log.c2
-rw-r--r--fs/jbd2/transaction.c6
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c1
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfs42proc.c2
-rw-r--r--fs/nfs/nfs4client.c1
-rw-r--r--fs/nfs/pnfs.c25
-rw-r--r--fs/nfs/pnfs.h10
-rw-r--r--fs/nfs/super.c5
-rw-r--r--fs/nfsd/nfs3xdr.c23
-rw-r--r--fs/nfsd/nfs4proc.c13
-rw-r--r--fs/nfsd/nfsxdr.c13
-rw-r--r--fs/ntfs/namei.c2
-rw-r--r--fs/ocfs2/export.c2
-rw-r--r--fs/overlayfs/Kconfig1
-rw-r--r--fs/overlayfs/copy_up.c24
-rw-r--r--fs/overlayfs/dir.c61
-rw-r--r--fs/overlayfs/inode.c12
-rw-r--r--fs/overlayfs/namei.c16
-rw-r--r--fs/overlayfs/overlayfs.h16
-rw-r--r--fs/overlayfs/ovl_entry.h2
-rw-r--r--fs/overlayfs/super.c18
-rw-r--r--fs/overlayfs/util.c72
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/quota/dquot.c16
-rw-r--r--fs/reiserfs/journal.c4
-rw-r--r--fs/stat.c1
-rw-r--r--fs/ufs/balloc.c26
-rw-r--r--fs/ufs/inode.c28
-rw-r--r--fs/ufs/super.c23
-rw-r--r--fs/ufs/util.h10
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c9
-rw-r--r--fs/xfs/libxfs/xfs_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c43
-rw-r--r--fs/xfs/xfs_bmap_util.c10
-rw-r--r--fs/xfs/xfs_buf.c38
-rw-r--r--fs/xfs/xfs_buf.h5
-rw-r--r--fs/xfs/xfs_file.c71
-rw-r--r--fs/xfs/xfs_fsmap.c5
-rw-r--r--fs/xfs/xfs_iomap.c4
63 files changed, 723 insertions, 357 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 643c70d2b2e6..4f8f75d9e839 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2563,7 +2563,7 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes);
static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_fs_info *fs_info,
unsigned num_items)
{
- return fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
+ return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
}
/*
@@ -2573,7 +2573,7 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_fs_info *fs_info,
static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_fs_info *fs_info,
unsigned num_items)
{
- return fs_info->nodesize * BTRFS_MAX_LEVEL * num_items;
+ return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items;
}
int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 60a750678a82..c24d615e3d7f 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -468,7 +468,7 @@ int verify_dir_item(struct btrfs_fs_info *fs_info,
if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
btrfs_crit(fs_info, "invalid dir item name len: %u",
- (unsigned)btrfs_dir_data_len(leaf, dir_item));
+ (unsigned)btrfs_dir_name_len(leaf, dir_item));
return 1;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8685d67185d0..5f678dcb20e6 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3467,10 +3467,12 @@ static int write_dev_supers(struct btrfs_device *device,
* we fua the first super. The others we allow
* to go down lazy.
*/
- if (i == 0)
- ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_FUA, bh);
- else
+ if (i == 0) {
+ ret = btrfsic_submit_bh(REQ_OP_WRITE,
+ REQ_SYNC | REQ_FUA, bh);
+ } else {
ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
+ }
if (ret)
errors++;
}
@@ -3535,7 +3537,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
bio->bi_end_io = btrfs_end_empty_barrier;
bio->bi_bdev = device->bdev;
- bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+ bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
init_completion(&device->flush_wait);
bio->bi_private = &device->flush_wait;
device->flush_bio = bio;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e390451c72e6..33d979e9ea2a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3993,6 +3993,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
info->space_info_kobj, "%s",
alloc_name(found->flags));
if (ret) {
+ percpu_counter_destroy(&found->total_bytes_pinned);
kfree(found);
return ret;
}
@@ -4844,7 +4845,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
spin_unlock(&delayed_rsv->lock);
commit:
- trans = btrfs_join_transaction(fs_info->fs_root);
+ trans = btrfs_join_transaction(fs_info->extent_root);
if (IS_ERR(trans))
return -ENOSPC;
@@ -4862,7 +4863,7 @@ static int flush_space(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, u64 num_bytes,
u64 orig_bytes, int state)
{
- struct btrfs_root *root = fs_info->fs_root;
+ struct btrfs_root *root = fs_info->extent_root;
struct btrfs_trans_handle *trans;
int nr;
int ret = 0;
@@ -5062,7 +5063,7 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
int flush_state = FLUSH_DELAYED_ITEMS_NR;
spin_lock(&space_info->lock);
- to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
+ to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->extent_root,
space_info);
if (!to_reclaim) {
spin_unlock(&space_info->lock);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d8da3edf2ac3..d3619e010005 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2458,7 +2458,7 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
if (!uptodate) {
ClearPageUptodate(page);
SetPageError(page);
- ret = ret < 0 ? ret : -EIO;
+ ret = err < 0 ? err : -EIO;
mapping_set_error(page->mapping, ret);
}
}
@@ -4377,6 +4377,123 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
return NULL;
}
+/*
+ * To cache previous fiemap extent
+ *
+ * Will be used for merging fiemap extent
+ */
+struct fiemap_cache {
+ u64 offset;
+ u64 phys;
+ u64 len;
+ u32 flags;
+ bool cached;
+};
+
+/*
+ * Helper to submit fiemap extent.
+ *
+ * Will try to merge current fiemap extent specified by @offset, @phys,
+ * @len and @flags with cached one.
+ * And only when we fails to merge, cached one will be submitted as
+ * fiemap extent.
+ *
+ * Return value is the same as fiemap_fill_next_extent().
+ */
+static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
+ struct fiemap_cache *cache,
+ u64 offset, u64 phys, u64 len, u32 flags)
+{
+ int ret = 0;
+
+ if (!cache->cached)
+ goto assign;
+
+ /*
+ * Sanity check, extent_fiemap() should have ensured that new
+ * fiemap extent won't overlap with cahced one.
+ * Not recoverable.
+ *
+ * NOTE: Physical address can overlap, due to compression
+ */
+ if (cache->offset + cache->len > offset) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ /*
+ * Only merges fiemap extents if
+ * 1) Their logical addresses are continuous
+ *
+ * 2) Their physical addresses are continuous
+ * So truly compressed (physical size smaller than logical size)
+ * extents won't get merged with each other
+ *
+ * 3) Share same flags except FIEMAP_EXTENT_LAST
+ * So regular extent won't get merged with prealloc extent
+ */
+ if (cache->offset + cache->len == offset &&
+ cache->phys + cache->len == phys &&
+ (cache->flags & ~FIEMAP_EXTENT_LAST) ==
+ (flags & ~FIEMAP_EXTENT_LAST)) {
+ cache->len += len;
+ cache->flags |= flags;
+ goto try_submit_last;
+ }
+
+ /* Not mergeable, need to submit cached one */
+ ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
+ cache->len, cache->flags);
+ cache->cached = false;
+ if (ret)
+ return ret;
+assign:
+ cache->cached = true;
+ cache->offset = offset;
+ cache->phys = phys;
+ cache->len = len;
+ cache->flags = flags;
+try_submit_last:
+ if (cache->flags & FIEMAP_EXTENT_LAST) {
+ ret = fiemap_fill_next_extent(fieinfo, cache->offset,
+ cache->phys, cache->len, cache->flags);
+ cache->cached = false;
+ }
+ return ret;
+}
+
+/*
+ * Sanity check for fiemap cache
+ *
+ * All fiemap cache should be submitted by emit_fiemap_extent()
+ * Iteration should be terminated either by last fiemap extent or
+ * fieinfo->fi_extents_max.
+ * So no cached fiemap should exist.
+ */
+static int check_fiemap_cache(struct btrfs_fs_info *fs_info,
+ struct fiemap_extent_info *fieinfo,
+ struct fiemap_cache *cache)
+{
+ int ret;
+
+ if (!cache->cached)
+ return 0;
+
+ /* Small and recoverbale problem, only to info developer */
+#ifdef CONFIG_BTRFS_DEBUG
+ WARN_ON(1);
+#endif
+ btrfs_warn(fs_info,
+ "unhandled fiemap cache detected: offset=%llu phys=%llu len=%llu flags=0x%x",
+ cache->offset, cache->phys, cache->len, cache->flags);
+ ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
+ cache->len, cache->flags);
+ cache->cached = false;
+ if (ret > 0)
+ ret = 0;
+ return ret;
+}
+
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent)
{
@@ -4394,6 +4511,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
struct extent_state *cached_state = NULL;
struct btrfs_path *path;
struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct fiemap_cache cache = { 0 };
int end = 0;
u64 em_start = 0;
u64 em_len = 0;
@@ -4573,8 +4691,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
flags |= FIEMAP_EXTENT_LAST;
end = 1;
}
- ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
- em_len, flags);
+ ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko,
+ em_len, flags);
if (ret) {
if (ret == 1)
ret = 0;
@@ -4582,6 +4700,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
}
out_free:
+ if (!ret)
+ ret = check_fiemap_cache(root->fs_info, fieinfo, &cache);
free_extent_map(em);
out:
btrfs_free_path(path);
diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c
index a97fdc156a03..baacc1866861 100644
--- a/fs/btrfs/hash.c
+++ b/fs/btrfs/hash.c
@@ -38,6 +38,7 @@ u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length)
{
SHASH_DESC_ON_STACK(shash, tfm);
u32 *ctx = (u32 *)shash_desc_ctx(shash);
+ u32 retval;
int err;
shash->tfm = tfm;
@@ -47,5 +48,7 @@ u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length)
err = crypto_shash_update(shash, address, length);
BUG_ON(err);
- return *ctx;
+ retval = *ctx;
+ barrier_data(ctx);
+ return retval;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 17cbe9306faf..ef3c98c527c1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2952,7 +2952,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
ret = test_range_bit(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset + ordered_extent->len - 1,
- EXTENT_DEFRAG, 1, cached_state);
+ EXTENT_DEFRAG, 0, cached_state);
if (ret) {
u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
if (0 && last_snapshot >= BTRFS_I(inode)->generation)
@@ -7483,8 +7483,8 @@ bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
int found = false;
void **pagep = NULL;
struct page *page = NULL;
- int start_idx;
- int end_idx;
+ unsigned long start_idx;
+ unsigned long end_idx;
start_idx = start >> PAGE_SHIFT;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 3fdde0b283c9..29308a80d66f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1671,8 +1671,12 @@ static long ceph_fallocate(struct file *file, int mode,
}
size = i_size_read(inode);
- if (!(mode & FALLOC_FL_KEEP_SIZE))
+ if (!(mode & FALLOC_FL_KEEP_SIZE)) {
endoff = offset + length;
+ ret = inode_newsize_ok(inode, endoff);
+ if (ret)
+ goto unlock;
+ }
if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
diff --git a/fs/dax.c b/fs/dax.c
index c22eaf162f95..2a6889b3585f 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1155,6 +1155,17 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
}
/*
+ * It is possible, particularly with mixed reads & writes to private
+ * mappings, that we have raced with a PMD fault that overlaps with
+ * the PTE we need to set up. If so just return and the fault will be
+ * retried.
+ */
+ if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) {
+ vmf_ret = VM_FAULT_NOPAGE;
+ goto unlock_entry;
+ }
+
+ /*
* Note that we don't bother to use iomap_apply here: DAX required
* the file system block size to be equal the page size, which means
* that we never have to deal with more than a single extent here.
@@ -1398,6 +1409,18 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
goto fallback;
/*
+ * It is possible, particularly with mixed reads & writes to private
+ * mappings, that we have raced with a PTE fault that overlaps with
+ * the PMD we need to set up. If so just return and the fault will be
+ * retried.
+ */
+ if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) &&
+ !pmd_devmap(*vmf->pmd)) {
+ result = 0;
+ goto unlock_entry;
+ }
+
+ /*
* Note that we don't use iomap_apply here. We aren't doing I/O, only
* setting up a mapping, so really we're using iomap_begin() as a way
* to look up our filesystem block.
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 26d77f9f8c12..2dcbd5698884 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -817,7 +817,7 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
iomap->bdev = bdev;
iomap->offset = (u64)first_block << blkbits;
if (blk_queue_dax(bdev->bd_queue))
- iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+ iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name);
else
iomap->dax_dev = NULL;
@@ -841,7 +841,7 @@ static int
ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length,
ssize_t written, unsigned flags, struct iomap *iomap)
{
- put_dax(iomap->dax_dev);
+ fs_put_dax(iomap->dax_dev);
if (iomap->type == IOMAP_MAPPED &&
written < length &&
(flags & IOMAP_WRITE))
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index fd389935ecd1..3ec0e46de95f 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -4,6 +4,7 @@
* Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
*/
+#include <linux/quotaops.h>
#include "ext4_jbd2.h"
#include "ext4.h"
#include "xattr.h"
@@ -232,6 +233,9 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
handle_t *handle;
int error, retries = 0;
+ error = dquot_initialize(inode);
+ if (error)
+ return error;
retry:
handle = ext4_journal_start(inode, EXT4_HT_XATTR,
ext4_jbd2_credits_xattr(inode));
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8e8046104f4d..32191548abed 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2523,7 +2523,6 @@ extern int ext4_search_dir(struct buffer_head *bh,
int buf_size,
struct inode *dir,
struct ext4_filename *fname,
- const struct qstr *d_name,
unsigned int offset,
struct ext4_dir_entry_2 **res_dir);
extern int ext4_generic_delete_entry(handle_t *handle,
@@ -3007,7 +3006,6 @@ extern int htree_inlinedir_to_tree(struct file *dir_file,
int *has_inline_data);
extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
struct ext4_filename *fname,
- const struct qstr *d_name,
struct ext4_dir_entry_2 **res_dir,
int *has_inline_data);
extern int ext4_delete_inline_entry(handle_t *handle,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2a97dff87b96..3e36508610b7 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3413,13 +3413,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
struct ext4_sb_info *sbi;
struct ext4_extent_header *eh;
struct ext4_map_blocks split_map;
- struct ext4_extent zero_ex;
+ struct ext4_extent zero_ex1, zero_ex2;
struct ext4_extent *ex, *abut_ex;
ext4_lblk_t ee_block, eof_block;
unsigned int ee_len, depth, map_len = map->m_len;
int allocated = 0, max_zeroout = 0;
int err = 0;
- int split_flag = 0;
+ int split_flag = EXT4_EXT_DATA_VALID2;
ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
"block %llu, max_blocks %u\n", inode->i_ino,
@@ -3436,7 +3436,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
- zero_ex.ee_len = 0;
+ zero_ex1.ee_len = 0;
+ zero_ex2.ee_len = 0;
trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
@@ -3576,62 +3577,52 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
if (ext4_encrypted_inode(inode))
max_zeroout = 0;
- /* If extent is less than s_max_zeroout_kb, zeroout directly */
- if (max_zeroout && (ee_len <= max_zeroout)) {
- err = ext4_ext_zeroout(inode, ex);
- if (err)
- goto out;
- zero_ex.ee_block = ex->ee_block;
- zero_ex.ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex));
- ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex));
-
- err = ext4_ext_get_access(handle, inode, path + depth);
- if (err)
- goto out;
- ext4_ext_mark_initialized(ex);
- ext4_ext_try_to_merge(handle, inode, path, ex);
- err = ext4_ext_dirty(handle, inode, path + path->p_depth);
- goto out;
- }
-
/*
- * four cases:
+ * five cases:
* 1. split the extent into three extents.
- * 2. split the extent into two extents, zeroout the first half.
- * 3. split the extent into two extents, zeroout the second half.
+ * 2. split the extent into two extents, zeroout the head of the first
+ * extent.
+ * 3. split the extent into two extents, zeroout the tail of the second
+ * extent.
* 4. split the extent into two extents with out zeroout.
+ * 5. no splitting needed, just possibly zeroout the head and / or the
+ * tail of the extent.
*/
split_map.m_lblk = map->m_lblk;
split_map.m_len = map->m_len;
- if (max_zeroout && (allocated > map->m_len)) {
+ if (max_zeroout && (allocated > split_map.m_len)) {
if (allocated <= max_zeroout) {
- /* case 3 */
- zero_ex.ee_block =
- cpu_to_le32(map->m_lblk);
- zero_ex.ee_len = cpu_to_le16(allocated);
- ext4_ext_store_pblock(&zero_ex,
- ext4_ext_pblock(ex) + map->m_lblk - ee_block);
- err = ext4_ext_zeroout(inode, &zero_ex);
+ /* case 3 or 5 */
+ zero_ex1.ee_block =
+ cpu_to_le32(split_map.m_lblk +
+ split_map.m_len);
+ zero_ex1.ee_len =
+ cpu_to_le16(allocated - split_map.m_len);
+ ext4_ext_store_pblock(&zero_ex1,
+ ext4_ext_pblock(ex) + split_map.m_lblk +
+ split_map.m_len - ee_block);
+ err = ext4_ext_zeroout(inode, &zero_ex1);
if (err)
goto out;
- split_map.m_lblk = map->m_lblk;
split_map.m_len = allocated;
- } else if (map->m_lblk - ee_block + map->m_len < max_zeroout) {
- /* case 2 */
- if (map->m_lblk != ee_block) {
- zero_ex.ee_block = ex->ee_block;
- zero_ex.ee_len = cpu_to_le16(map->m_lblk -
+ }
+ if (split_map.m_lblk - ee_block + split_map.m_len <
+ max_zeroout) {
+ /* case 2 or 5 */
+ if (split_map.m_lblk != ee_block) {
+ zero_ex2.ee_block = ex->ee_block;
+ zero_ex2.ee_len = cpu_to_le16(split_map.m_lblk -
ee_block);
- ext4_ext_store_pblock(&zero_ex,
+ ext4_ext_store_pblock(&zero_ex2,
ext4_ext_pblock(ex));
- err = ext4_ext_zeroout(inode, &zero_ex);
+ err = ext4_ext_zeroout(inode, &zero_ex2);
if (err)
goto out;
}
+ split_map.m_len += split_map.m_lblk - ee_block;
split_map.m_lblk = ee_block;
- split_map.m_len = map->m_lblk - ee_block + map->m_len;
allocated = map->m_len;
}
}
@@ -3642,8 +3633,11 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
err = 0;
out:
/* If we have gotten a failure, don't zero out status tree */
- if (!err)
- err = ext4_zeroout_es(inode, &zero_ex);
+ if (!err) {
+ err = ext4_zeroout_es(inode, &zero_ex1);
+ if (!err)
+ err = ext4_zeroout_es(inode, &zero_ex2);
+ }
return err ? err : allocated;
}
@@ -4883,6 +4877,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
/* Zero out partial block at the edges of the range */
ret = ext4_zero_partial_blocks(handle, inode, offset, len);
+ if (ret >= 0)
+ ext4_update_inode_fsync_trans(handle, inode, 1);
if (file->f_flags & O_SYNC)
ext4_handle_sync(handle);
@@ -5569,6 +5565,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
ext4_handle_sync(handle);
inode->i_mtime = inode->i_ctime = current_time(inode);
ext4_mark_inode_dirty(handle, inode);
+ ext4_update_inode_fsync_trans(handle, inode, 1);
out_stop:
ext4_journal_stop(handle);
@@ -5742,6 +5739,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
up_write(&EXT4_I(inode)->i_data_sem);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
+ if (ret >= 0)
+ ext4_update_inode_fsync_trans(handle, inode, 1);
out_stop:
ext4_journal_stop(handle);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 831fd6beebf0..02ce7e7bbdf5 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -474,57 +474,37 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
endoff = (loff_t)end_blk << blkbits;
index = startoff >> PAGE_SHIFT;
- end = endoff >> PAGE_SHIFT;
+ end = (endoff - 1) >> PAGE_SHIFT;
pagevec_init(&pvec, 0);
do {
int i, num;
unsigned long nr_pages;
- num = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
+ num = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
(pgoff_t)num);
- if (nr_pages == 0) {
- if (whence == SEEK_DATA)
- break;
-
- BUG_ON(whence != SEEK_HOLE);
- /*
- * If this is the first time to go into the loop and
- * offset is not beyond the end offset, it will be a
- * hole at this offset
- */
- if (lastoff == startoff || lastoff < endoff)
- found = 1;
- break;
- }
-
- /*
- * If this is the first time to go into the loop and
- * offset is smaller than the first page offset, it will be a
- * hole at this offset.
- */
- if (lastoff == startoff && whence == SEEK_HOLE &&
- lastoff < page_offset(pvec.pages[0])) {
- found = 1;
+ if (nr_pages == 0)
break;
- }
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
struct buffer_head *bh, *head;
/*
- * If the current offset is not beyond the end of given
- * range, it will be a hole.
+ * If current offset is smaller than the page offset,
+ * there is a hole at this offset.
*/
- if (lastoff < endoff && whence == SEEK_HOLE &&
- page->index > end) {
+ if (whence == SEEK_HOLE && lastoff < endoff &&
+ lastoff < page_offset(pvec.pages[i])) {
found = 1;
*offset = lastoff;
goto out;
}
+ if (page->index > end)
+ goto out;
+
lock_page(page);
if (unlikely(page->mapping != inode->i_mapping)) {
@@ -564,20 +544,18 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
unlock_page(page);
}
- /*
- * The no. of pages is less than our desired, that would be a
- * hole in there.
- */
- if (nr_pages < num && whence == SEEK_HOLE) {
- found = 1;
- *offset = lastoff;
+ /* The no. of pages is less than our desired, we are done. */
+ if (nr_pages < num)
break;
- }
index = pvec.pages[i - 1]->index + 1;
pagevec_release(&pvec);
} while (index <= end);
+ if (whence == SEEK_HOLE && lastoff < endoff) {
+ found = 1;
+ *offset = lastoff;
+ }
out:
pagevec_release(&pvec);
return found;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index d5dea4c293ef..8d141c0c8ff9 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1627,7 +1627,6 @@ out:
struct buffer_head *ext4_find_inline_entry(struct inode *dir,
struct ext4_filename *fname,
- const struct qstr *d_name,
struct ext4_dir_entry_2 **res_dir,
int *has_inline_data)
{
@@ -1649,7 +1648,7 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
EXT4_INLINE_DOTDOT_SIZE;
inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
- dir, fname, d_name, 0, res_dir);
+ dir, fname, 0, res_dir);
if (ret == 1)
goto out_find;
if (ret < 0)
@@ -1662,7 +1661,7 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE;
ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
- dir, fname, d_name, 0, res_dir);
+ dir, fname, 0, res_dir);
if (ret == 1)
goto out_find;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5834c4d76be8..5cf82d03968c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2124,15 +2124,29 @@ static int ext4_writepage(struct page *page,
static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
{
int len;
- loff_t size = i_size_read(mpd->inode);
+ loff_t size;
int err;
BUG_ON(page->index != mpd->first_page);
+ clear_page_dirty_for_io(page);
+ /*
+ * We have to be very careful here! Nothing protects writeback path
+ * against i_size changes and the page can be writeably mapped into
+ * page tables. So an application can be growing i_size and writing
+ * data through mmap while writeback runs. clear_page_dirty_for_io()
+ * write-protects our page in page tables and the page cannot get
+ * written to again until we release page lock. So only after
+ * clear_page_dirty_for_io() we are safe to sample i_size for
+ * ext4_bio_write_page() to zero-out tail of the written page. We rely
+ * on the barrier provided by TestClearPageDirty in
+ * clear_page_dirty_for_io() to make sure i_size is really sampled only
+ * after page tables are updated.
+ */
+ size = i_size_read(mpd->inode);
if (page->index == size >> PAGE_SHIFT)
len = size & ~PAGE_MASK;
else
len = PAGE_SIZE;
- clear_page_dirty_for_io(page);
err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
if (!err)
mpd->wbc->nr_to_write--;
@@ -3412,7 +3426,7 @@ retry:
bdev = inode->i_sb->s_bdev;
iomap->bdev = bdev;
if (blk_queue_dax(bdev->bd_queue))
- iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+ iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name);
else
iomap->dax_dev = NULL;
iomap->offset = first_block << blkbits;
@@ -3447,7 +3461,7 @@ static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
int blkbits = inode->i_blkbits;
bool truncate = false;
- put_dax(iomap->dax_dev);
+ fs_put_dax(iomap->dax_dev);
if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
return 0;
@@ -3629,9 +3643,6 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
get_block_func = ext4_dio_get_block_unwritten_async;
dio_flags = DIO_LOCKING;
}
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
- BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
-#endif
ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
get_block_func, ext4_end_io_dio, NULL,
dio_flags);
@@ -3713,7 +3724,7 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
*/
inode_lock_shared(inode);
ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
- iocb->ki_pos + count);
+ iocb->ki_pos + count - 1);
if (ret)
goto out_unlock;
ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
@@ -4207,6 +4218,8 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
inode->i_mtime = inode->i_ctime = current_time(inode);
ext4_mark_inode_dirty(handle, inode);
+ if (ret >= 0)
+ ext4_update_inode_fsync_trans(handle, inode, 1);
out_stop:
ext4_journal_stop(handle);
out_dio:
@@ -5637,8 +5650,9 @@ static int ext4_expand_extra_isize(struct inode *inode,
/* No extended attributes present */
if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
- memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0,
- new_extra_isize);
+ memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
+ EXT4_I(inode)->i_extra_isize, 0,
+ new_extra_isize - EXT4_I(inode)->i_extra_isize);
EXT4_I(inode)->i_extra_isize = new_extra_isize;
return 0;
}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 5083bce20ac4..b7928cddd539 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3887,7 +3887,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
err = ext4_mb_load_buddy(sb, group, &e4b);
if (err) {
- ext4_error(sb, "Error loading buddy information for %u", group);
+ ext4_warning(sb, "Error %d loading buddy information for %u",
+ err, group);
put_bh(bitmap_bh);
return 0;
}
@@ -4044,10 +4045,11 @@ repeat:
BUG_ON(pa->pa_type != MB_INODE_PA);
group = ext4_get_group_number(sb, pa->pa_pstart);
- err = ext4_mb_load_buddy(sb, group, &e4b);
+ err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
+ GFP_NOFS|__GFP_NOFAIL);
if (err) {
- ext4_error(sb, "Error loading buddy information for %u",
- group);
+ ext4_error(sb, "Error %d loading buddy information for %u",
+ err, group);
continue;
}
@@ -4303,11 +4305,14 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
spin_unlock(&lg->lg_prealloc_lock);
list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
+ int err;
group = ext4_get_group_number(sb, pa->pa_pstart);
- if (ext4_mb_load_buddy(sb, group, &e4b)) {
- ext4_error(sb, "Error loading buddy information for %u",
- group);
+ err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
+ GFP_NOFS|__GFP_NOFAIL);
+ if (err) {
+ ext4_error(sb, "Error %d loading buddy information for %u",
+ err, group);
continue;
}
ext4_lock_group(sb, group);
@@ -5127,8 +5132,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
ret = ext4_mb_load_buddy(sb, group, &e4b);
if (ret) {
- ext4_error(sb, "Error in loading buddy "
- "information for %u", group);
+ ext4_warning(sb, "Error %d loading buddy information for %u",
+ ret, group);
return ret;
}
bitmap = e4b.bd_bitmap;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b81f7d46f344..404256caf9cf 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1155,12 +1155,11 @@ errout:
static inline int search_dirblock(struct buffer_head *bh,
struct inode *dir,
struct ext4_filename *fname,
- const struct qstr *d_name,
unsigned int offset,
struct ext4_dir_entry_2 **res_dir)
{
return ext4_search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
- fname, d_name, offset, res_dir);
+ fname, offset, res_dir);
}
/*
@@ -1262,7 +1261,6 @@ static inline bool ext4_match(const struct ext4_filename *fname,
*/
int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
struct inode *dir, struct ext4_filename *fname,
- const struct qstr *d_name,
unsigned int offset, struct ext4_dir_entry_2 **res_dir)
{
struct ext4_dir_entry_2 * de;
@@ -1355,7 +1353,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
if (ext4_has_inline_data(dir)) {
int has_inline_data = 1;
- ret = ext4_find_inline_entry(dir, &fname, d_name, res_dir,
+ ret = ext4_find_inline_entry(dir, &fname, res_dir,
&has_inline_data);
if (has_inline_data) {
if (inlined)
@@ -1447,7 +1445,7 @@ restart:
goto next;
}
set_buffer_verified(bh);
- i = search_dirblock(bh, dir, &fname, d_name,
+ i = search_dirblock(bh, dir, &fname,
block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
if (i == 1) {
EXT4_I(dir)->i_dir_start_lookup = block;
@@ -1488,7 +1486,6 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
{
struct super_block * sb = dir->i_sb;
struct dx_frame frames[2], *frame;
- const struct qstr *d_name = fname->usr_fname;
struct buffer_head *bh;
ext4_lblk_t block;
int retval;
@@ -1505,7 +1502,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
if (IS_ERR(bh))
goto errout;
- retval = search_dirblock(bh, dir, fname, d_name,
+ retval = search_dirblock(bh, dir, fname,
block << EXT4_BLOCK_SIZE_BITS(sb),
res_dir);
if (retval == 1)
@@ -1530,7 +1527,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
bh = NULL;
errout:
- dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name));
+ dxtrace(printk(KERN_DEBUG "%s not found\n", fname->usr_fname->name));
success:
dx_release(frames);
return bh;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0b177da9ea82..d37c81f327e7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -848,14 +848,9 @@ static inline void ext4_quota_off_umount(struct super_block *sb)
{
int type;
- if (ext4_has_feature_quota(sb)) {
- dquot_disable(sb, -1,
- DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
- } else {
- /* Use our quota_off function to clear inode flags etc. */
- for (type = 0; type < EXT4_MAXQUOTAS; type++)
- ext4_quota_off(sb, type);
- }
+ /* Use our quota_off function to clear inode flags etc. */
+ for (type = 0; type < EXT4_MAXQUOTAS; type++)
+ ext4_quota_off(sb, type);
}
#else
static inline void ext4_quota_off_umount(struct super_block *sb)
@@ -1179,6 +1174,9 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
return res;
}
+ res = dquot_initialize(inode);
+ if (res)
+ return res;
retry:
handle = ext4_journal_start(inode, EXT4_HT_MISC,
ext4_jbd2_credits_xattr(inode));
@@ -5485,7 +5483,7 @@ static int ext4_quota_off(struct super_block *sb, int type)
goto out;
err = dquot_quota_off(sb, type);
- if (err)
+ if (err || ext4_has_feature_quota(sb))
goto out_put;
inode_lock(inode);
@@ -5505,6 +5503,7 @@ static int ext4_quota_off(struct super_block *sb, int type)
out_unlock:
inode_unlock(inode);
out_put:
+ lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
iput(inode);
return err;
out:
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 8fb7ce14e6eb..5d3c2536641c 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -888,6 +888,8 @@ inserted:
else {
u32 ref;
+ WARN_ON_ONCE(dquot_initialize_needed(inode));
+
/* The old block is released after updating
the inode. */
error = dquot_alloc_block(inode,
@@ -954,6 +956,8 @@ inserted:
/* We need to allocate a new block */
ext4_fsblk_t goal, block;
+ WARN_ON_ONCE(dquot_initialize_needed(inode));
+
goal = ext4_group_first_block_no(sb,
EXT4_I(inode)->i_block_group);
@@ -1166,6 +1170,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
return -EINVAL;
if (strlen(name) > 255)
return -ERANGE;
+
ext4_write_lock_xattr(inode, &no_expand);
error = ext4_reserve_inode_write(handle, inode, &is.iloc);
@@ -1267,6 +1272,9 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
int error, retries = 0;
int credits = ext4_jbd2_credits_xattr(inode);
+ error = dquot_initialize(inode);
+ if (error)
+ return error;
retry:
handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
if (IS_ERR(handle)) {
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 2185c7a040a1..fd2e651bad6d 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1078,6 +1078,7 @@ static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address,
{
SHASH_DESC_ON_STACK(shash, sbi->s_chksum_driver);
u32 *ctx = (u32 *)shash_desc_ctx(shash);
+ u32 retval;
int err;
shash->tfm = sbi->s_chksum_driver;
@@ -1087,7 +1088,9 @@ static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address,
err = crypto_shash_update(shash, address, length);
BUG_ON(err);
- return *ctx;
+ retval = *ctx;
+ barrier_data(ctx);
+ return retval;
}
static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc,
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 5a1b58f8fef4..65c88379a3a1 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -975,8 +975,15 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
int err;
char *suffix = "";
- if (sb->s_bdev)
+ if (sb->s_bdev) {
suffix = "-fuseblk";
+ /*
+ * sb->s_bdi points to blkdev's bdi however we want to redirect
+ * it to our private bdi...
+ */
+ bdi_put(sb->s_bdi);
+ sb->s_bdi = &noop_backing_dev_info;
+ }
err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev),
MINOR(fc->dev), suffix);
if (err)
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f865b96374df..d2955daf17a4 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -659,7 +659,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
struct gfs2_log_header *lh;
unsigned int tail;
u32 hash;
- int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META;
+ int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC;
struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
lh = page_address(page);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 9ee4832b6f8b..2d30a6da7013 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -680,6 +680,12 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_);
handle->h_buffer_credits = nblocks;
+ /*
+ * Restore the original nofs context because the journal restart
+ * is basically the same thing as journal stop and start.
+ * start_this_handle will start a new nofs context.
+ */
+ memalloc_nofs_restore(handle->saved_alloc_context);
ret = start_this_handle(journal, handle, gfp_mask);
return ret;
}
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index f5714ee01000..23542dc44a25 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -454,6 +454,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
goto out_err_free;
/* fh */
+ rc = -EIO;
p = xdr_inline_decode(&stream, 4);
if (!p)
goto out_err_free;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e9b4c3320e37..3e24392f2caa 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -398,7 +398,6 @@ extern struct file_system_type nfs4_referral_fs_type;
bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t);
struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *,
struct nfs_subversion *);
-void nfs_initialise_sb(struct super_block *);
int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *);
int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *);
struct dentry *nfs_fs_mount_common(struct nfs_server *, int, const char *,
@@ -458,7 +457,6 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
/* super.c */
-void nfs_clone_super(struct super_block *, struct nfs_mount_info *);
void nfs_umount_begin(struct super_block *);
int nfs_statfs(struct dentry *, struct kstatfs *);
int nfs_show_options(struct seq_file *, struct dentry *);
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 1a224a33a6c2..e5686be67be8 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -246,7 +246,7 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
devname = nfs_devname(dentry, page, PAGE_SIZE);
if (IS_ERR(devname))
- mnt = (struct vfsmount *)devname;
+ mnt = ERR_CAST(devname);
else
mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata);
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 929d09a5310a..319a47db218d 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -177,7 +177,7 @@ static ssize_t _nfs42_proc_copy(struct file *src,
if (status)
goto out;
- if (!nfs_write_verifier_cmp(&res->write_res.verifier.verifier,
+ if (nfs_write_verifier_cmp(&res->write_res.verifier.verifier,
&res->commit_res.verf->verifier)) {
status = -EAGAIN;
goto out;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 692a7a8bfc7a..66776f022111 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -582,7 +582,6 @@ int nfs40_walk_client_list(struct nfs_client *new,
*/
nfs4_schedule_path_down_recovery(pos);
default:
- spin_lock(&nn->nfs_client_lock);
goto out;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index adc6ec28d4b5..c383d0913b54 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2094,12 +2094,26 @@ pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio)
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout);
+/*
+ * Check for any intersection between the request and the pgio->pg_lseg,
+ * and if none, put this pgio->pg_lseg away.
+ */
+static void
+pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+ if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) {
+ pnfs_put_lseg(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ }
+}
+
void
pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
u64 rd_size = req->wb_bytes;
pnfs_generic_pg_check_layout(pgio);
+ pnfs_generic_pg_check_range(pgio, req);
if (pgio->pg_lseg == NULL) {
if (pgio->pg_dreq == NULL)
rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
@@ -2131,6 +2145,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req, u64 wb_size)
{
pnfs_generic_pg_check_layout(pgio);
+ pnfs_generic_pg_check_range(pgio, req);
if (pgio->pg_lseg == NULL) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
@@ -2191,16 +2206,10 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio,
seg_end = pnfs_end_offset(pgio->pg_lseg->pls_range.offset,
pgio->pg_lseg->pls_range.length);
req_start = req_offset(req);
- WARN_ON_ONCE(req_start >= seg_end);
+
/* start of request is past the last byte of this segment */
- if (req_start >= seg_end) {
- /* reference the new lseg */
- if (pgio->pg_ops->pg_cleanup)
- pgio->pg_ops->pg_cleanup(pgio);
- if (pgio->pg_ops->pg_init)
- pgio->pg_ops->pg_init(pgio, req);
+ if (req_start >= seg_end)
return 0;
- }
/* adjust 'size' iff there are fewer bytes left in the
* segment than what nfs_generic_pg_test returned */
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 2d05b756a8d6..99731e3e332f 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -593,6 +593,16 @@ pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1,
return pnfs_is_range_intersecting(l1->offset, end1, l2->offset, end2);
}
+static inline bool
+pnfs_lseg_request_intersecting(struct pnfs_layout_segment *lseg, struct nfs_page *req)
+{
+ u64 seg_last = pnfs_end_offset(lseg->pls_range.offset, lseg->pls_range.length);
+ u64 req_last = req_offset(req) + req->wb_bytes;
+
+ return pnfs_is_range_intersecting(lseg->pls_range.offset, seg_last,
+ req_offset(req), req_last);
+}
+
extern unsigned int layoutstats_timer;
#ifdef NFS_DEBUG
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2f3822a4a7d5..eceb4eabb064 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2301,7 +2301,7 @@ EXPORT_SYMBOL_GPL(nfs_remount);
/*
* Initialise the common bits of the superblock
*/
-inline void nfs_initialise_sb(struct super_block *sb)
+static void nfs_initialise_sb(struct super_block *sb)
{
struct nfs_server *server = NFS_SB(sb);
@@ -2348,7 +2348,8 @@ EXPORT_SYMBOL_GPL(nfs_fill_super);
/*
* Finish setting up a cloned NFS2/3/4 superblock
*/
-void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info)
+static void nfs_clone_super(struct super_block *sb,
+ struct nfs_mount_info *mount_info)
{
const struct super_block *old_sb = mount_info->cloned->sb;
struct nfs_server *server = NFS_SB(sb);
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 12feac6ee2fd..452334694a5d 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -334,11 +334,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
if (!p)
return 0;
p = xdr_decode_hyper(p, &args->offset);
- args->count = ntohl(*p++);
-
- if (!xdr_argsize_check(rqstp, p))
- return 0;
+ args->count = ntohl(*p++);
len = min(args->count, max_blocksize);
/* set up the kvec */
@@ -352,7 +349,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
v++;
}
args->vlen = v;
- return 1;
+ return xdr_argsize_check(rqstp, p);
}
int
@@ -544,11 +541,9 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
p = decode_fh(p, &args->fh);
if (!p)
return 0;
- if (!xdr_argsize_check(rqstp, p))
- return 0;
args->buffer = page_address(*(rqstp->rq_next_page++));
- return 1;
+ return xdr_argsize_check(rqstp, p);
}
int
@@ -574,14 +569,10 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
args->verf = p; p += 2;
args->dircount = ~0;
args->count = ntohl(*p++);
-
- if (!xdr_argsize_check(rqstp, p))
- return 0;
-
args->count = min_t(u32, args->count, PAGE_SIZE);
args->buffer = page_address(*(rqstp->rq_next_page++));
- return 1;
+ return xdr_argsize_check(rqstp, p);
}
int
@@ -599,9 +590,6 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
args->dircount = ntohl(*p++);
args->count = ntohl(*p++);
- if (!xdr_argsize_check(rqstp, p))
- return 0;
-
len = args->count = min(args->count, max_blocksize);
while (len > 0) {
struct page *p = *(rqstp->rq_next_page++);
@@ -609,7 +597,8 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
args->buffer = page_address(p);
len -= PAGE_SIZE;
}
- return 1;
+
+ return xdr_argsize_check(rqstp, p);
}
int
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index c453a1998e00..dadb3bf305b2 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1769,6 +1769,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
opdesc->op_get_currentstateid(cstate, &op->u);
op->status = opdesc->op_func(rqstp, cstate, &op->u);
+ /* Only from SEQUENCE */
+ if (cstate->status == nfserr_replay_cache) {
+ dprintk("%s NFS4.1 replay from cache\n", __func__);
+ status = op->status;
+ goto out;
+ }
if (!op->status) {
if (opdesc->op_set_currentstateid)
opdesc->op_set_currentstateid(cstate, &op->u);
@@ -1779,14 +1785,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
if (need_wrongsec_check(rqstp))
op->status = check_nfsd_access(current_fh->fh_export, rqstp);
}
-
encode_op:
- /* Only from SEQUENCE */
- if (cstate->status == nfserr_replay_cache) {
- dprintk("%s NFS4.1 replay from cache\n", __func__);
- status = op->status;
- goto out;
- }
if (op->status == nfserr_replay_me) {
op->replay = &cstate->replay_owner->so_replay;
nfsd4_encode_replay(&resp->xdr, op);
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 6a4947a3f4fa..de07ff625777 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -257,9 +257,6 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
len = args->count = ntohl(*p++);
p++; /* totalcount - unused */
- if (!xdr_argsize_check(rqstp, p))
- return 0;
-
len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2);
/* set up somewhere to store response.
@@ -275,7 +272,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
v++;
}
args->vlen = v;
- return 1;
+ return xdr_argsize_check(rqstp, p);
}
int
@@ -365,11 +362,9 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readli
p = decode_fh(p, &args->fh);
if (!p)
return 0;
- if (!xdr_argsize_check(rqstp, p))
- return 0;
args->buffer = page_address(*(rqstp->rq_next_page++));
- return 1;
+ return xdr_argsize_check(rqstp, p);
}
int
@@ -407,11 +402,9 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
args->cookie = ntohl(*p++);
args->count = ntohl(*p++);
args->count = min_t(u32, args->count, PAGE_SIZE);
- if (!xdr_argsize_check(rqstp, p))
- return 0;
args->buffer = page_address(*(rqstp->rq_next_page++));
- return 1;
+ return xdr_argsize_check(rqstp, p);
}
/*
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 358258364616..4690cd75d8d7 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -159,7 +159,7 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
PTR_ERR(dent_inode));
kfree(name);
/* Return the error code. */
- return (struct dentry *)dent_inode;
+ return ERR_CAST(dent_inode);
}
/* It is guaranteed that @name is no longer allocated at this point. */
if (MREF_ERR(mref) == -ENOENT) {
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 827fc9809bc2..9f88188060db 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -119,7 +119,7 @@ check_err:
if (IS_ERR(inode)) {
mlog_errno(PTR_ERR(inode));
- result = (void *)inode;
+ result = ERR_CAST(inode);
goto bail;
}
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 0daac5112f7a..c0c9683934b7 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -1,5 +1,6 @@
config OVERLAY_FS
tristate "Overlay filesystem support"
+ select EXPORTFS
help
An overlay filesystem combines two filesystems - an 'upper' filesystem
and a 'lower' filesystem. When a name exists in both filesystems, the
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 9008ab9fbd2e..7a44533f4bbf 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -300,7 +300,11 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
return PTR_ERR(fh);
}
- err = ovl_do_setxattr(upper, OVL_XATTR_ORIGIN, fh, fh ? fh->len : 0, 0);
+ /*
+ * Do not fail when upper doesn't support xattrs.
+ */
+ err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
+ fh ? fh->len : 0, 0);
kfree(fh);
return err;
@@ -342,13 +346,14 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
if (tmpfile)
temp = ovl_do_tmpfile(upperdir, stat->mode);
else
- temp = ovl_lookup_temp(workdir, dentry);
- err = PTR_ERR(temp);
- if (IS_ERR(temp))
- goto out1;
-
+ temp = ovl_lookup_temp(workdir);
err = 0;
- if (!tmpfile)
+ if (IS_ERR(temp)) {
+ err = PTR_ERR(temp);
+ temp = NULL;
+ }
+
+ if (!err && !tmpfile)
err = ovl_create_real(wdir, temp, &cattr, NULL, true);
if (new_creds) {
@@ -454,6 +459,11 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
ovl_path_upper(parent, &parentpath);
upperdir = parentpath.dentry;
+ /* Mark parent "impure" because it may now contain non-pure upper */
+ err = ovl_set_impure(parent, upperdir);
+ if (err)
+ return err;
+
err = vfs_getattr(&parentpath, &pstat,
STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
if (err)
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 723b98b90698..a63a71656e9b 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -41,7 +41,7 @@ void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
}
}
-struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
+struct dentry *ovl_lookup_temp(struct dentry *workdir)
{
struct dentry *temp;
char name[20];
@@ -68,7 +68,7 @@ static struct dentry *ovl_whiteout(struct dentry *workdir,
struct dentry *whiteout;
struct inode *wdir = workdir->d_inode;
- whiteout = ovl_lookup_temp(workdir, dentry);
+ whiteout = ovl_lookup_temp(workdir);
if (IS_ERR(whiteout))
return whiteout;
@@ -127,17 +127,28 @@ int ovl_create_real(struct inode *dir, struct dentry *newdentry,
return err;
}
-static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
+static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper,
+ int xerr)
{
int err;
- err = ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0);
+ err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr);
if (!err)
ovl_dentry_set_opaque(dentry);
return err;
}
+static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
+{
+ /*
+ * Fail with -EIO when trying to create opaque dir and upper doesn't
+ * support xattrs. ovl_rename() calls ovl_set_opaque_xerr(-EXDEV) to
+ * return a specific error for noxattr case.
+ */
+ return ovl_set_opaque_xerr(dentry, upperdentry, -EIO);
+}
+
/* Common operations required to be done after creation of file on upper */
static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
struct dentry *newdentry, bool hardlink)
@@ -162,6 +173,11 @@ static bool ovl_type_merge(struct dentry *dentry)
return OVL_TYPE_MERGE(ovl_path_type(dentry));
}
+static bool ovl_type_origin(struct dentry *dentry)
+{
+ return OVL_TYPE_ORIGIN(ovl_path_type(dentry));
+}
+
static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
struct cattr *attr, struct dentry *hardlink)
{
@@ -250,7 +266,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
if (upper->d_parent->d_inode != udir)
goto out_unlock;
- opaquedir = ovl_lookup_temp(workdir, dentry);
+ opaquedir = ovl_lookup_temp(workdir);
err = PTR_ERR(opaquedir);
if (IS_ERR(opaquedir))
goto out_unlock;
@@ -382,7 +398,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
if (err)
goto out;
- newdentry = ovl_lookup_temp(workdir, dentry);
+ newdentry = ovl_lookup_temp(workdir);
err = PTR_ERR(newdentry);
if (IS_ERR(newdentry))
goto out_unlock;
@@ -846,18 +862,16 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
if (IS_ERR(redirect))
return PTR_ERR(redirect);
- err = ovl_do_setxattr(ovl_dentry_upper(dentry), OVL_XATTR_REDIRECT,
- redirect, strlen(redirect), 0);
+ err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry),
+ OVL_XATTR_REDIRECT,
+ redirect, strlen(redirect), -EXDEV);
if (!err) {
spin_lock(&dentry->d_lock);
ovl_dentry_set_redirect(dentry, redirect);
spin_unlock(&dentry->d_lock);
} else {
kfree(redirect);
- if (err == -EOPNOTSUPP)
- ovl_clear_redirect_dir(dentry->d_sb);
- else
- pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err);
+ pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err);
/* Fall back to userspace copy-up */
err = -EXDEV;
}
@@ -943,6 +957,25 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
old_upperdir = ovl_dentry_upper(old->d_parent);
new_upperdir = ovl_dentry_upper(new->d_parent);
+ if (!samedir) {
+ /*
+ * When moving a merge dir or non-dir with copy up origin into
+ * a new parent, we are marking the new parent dir "impure".
+ * When ovl_iterate() iterates an "impure" upper dir, it will
+ * lookup the origin inodes of the entries to fill d_ino.
+ */
+ if (ovl_type_origin(old)) {
+ err = ovl_set_impure(new->d_parent, new_upperdir);
+ if (err)
+ goto out_revert_creds;
+ }
+ if (!overwrite && ovl_type_origin(new)) {
+ err = ovl_set_impure(old->d_parent, old_upperdir);
+ if (err)
+ goto out_revert_creds;
+ }
+ }
+
trap = lock_rename(new_upperdir, old_upperdir);
olddentry = lookup_one_len(old->d_name.name, old_upperdir,
@@ -992,7 +1025,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
if (ovl_type_merge_or_lower(old))
err = ovl_set_redirect(old, samedir);
else if (!old_opaque && ovl_type_merge(new->d_parent))
- err = ovl_set_opaque(old, olddentry);
+ err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
if (err)
goto out_dput;
}
@@ -1000,7 +1033,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
if (ovl_type_merge_or_lower(new))
err = ovl_set_redirect(new, samedir);
else if (!new_opaque && ovl_type_merge(old->d_parent))
- err = ovl_set_opaque(new, newdentry);
+ err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
if (err)
goto out_dput;
}
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index ad9547f82da5..d613e2c41242 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -240,6 +240,16 @@ int ovl_xattr_get(struct dentry *dentry, const char *name,
return res;
}
+static bool ovl_can_list(const char *s)
+{
+ /* List all non-trusted xatts */
+ if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
+ return true;
+
+ /* Never list trusted.overlay, list other trusted for superuser only */
+ return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN);
+}
+
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
{
struct dentry *realdentry = ovl_dentry_real(dentry);
@@ -263,7 +273,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
return -EIO;
len -= slen;
- if (ovl_is_private_xattr(s)) {
+ if (!ovl_can_list(s)) {
res -= slen;
memmove(s, s + slen, len);
} else {
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index bad0f665a635..f3136c31e72a 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -169,17 +169,7 @@ invalid:
static bool ovl_is_opaquedir(struct dentry *dentry)
{
- int res;
- char val;
-
- if (!d_is_dir(dentry))
- return false;
-
- res = vfs_getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1);
- if (res == 1 && val == 'y')
- return true;
-
- return false;
+ return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE);
}
static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
@@ -351,6 +341,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
unsigned int ctr = 0;
struct inode *inode = NULL;
bool upperopaque = false;
+ bool upperimpure = false;
char *upperredirect = NULL;
struct dentry *this;
unsigned int i;
@@ -395,6 +386,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
poe = roe;
}
upperopaque = d.opaque;
+ if (upperdentry && d.is_dir)
+ upperimpure = ovl_is_impuredir(upperdentry);
}
if (!d.stop && poe->numlower) {
@@ -463,6 +456,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
revert_creds(old_cred);
oe->opaque = upperopaque;
+ oe->impure = upperimpure;
oe->redirect = upperredirect;
oe->__upperdentry = upperdentry;
memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index caa36cb9c46d..0623cebeefff 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -24,6 +24,7 @@ enum ovl_path_type {
#define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque"
#define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect"
#define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin"
+#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
/*
* The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
@@ -203,10 +204,10 @@ struct dentry *ovl_dentry_real(struct dentry *dentry);
struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
bool ovl_dentry_is_opaque(struct dentry *dentry);
+bool ovl_dentry_is_impure(struct dentry *dentry);
bool ovl_dentry_is_whiteout(struct dentry *dentry);
void ovl_dentry_set_opaque(struct dentry *dentry);
bool ovl_redirect_dir(struct super_block *sb);
-void ovl_clear_redirect_dir(struct super_block *sb);
const char *ovl_dentry_get_redirect(struct dentry *dentry);
void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
@@ -219,6 +220,17 @@ bool ovl_is_whiteout(struct dentry *dentry);
struct file *ovl_path_open(struct path *path, int flags);
int ovl_copy_up_start(struct dentry *dentry);
void ovl_copy_up_end(struct dentry *dentry);
+bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
+int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
+ const char *name, const void *value, size_t size,
+ int xerr);
+int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
+
+static inline bool ovl_is_impuredir(struct dentry *dentry)
+{
+ return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE);
+}
+
/* namei.c */
int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
@@ -263,7 +275,7 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to)
/* dir.c */
extern const struct inode_operations ovl_dir_inode_operations;
-struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry);
+struct dentry *ovl_lookup_temp(struct dentry *workdir);
struct cattr {
dev_t rdev;
umode_t mode;
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index b2023ddb8532..34bc4a9f5c61 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -28,6 +28,7 @@ struct ovl_fs {
/* creds of process who forced instantiation of super block */
const struct cred *creator_cred;
bool tmpfile;
+ bool noxattr;
wait_queue_head_t copyup_wq;
/* sb common to all layers */
struct super_block *same_sb;
@@ -42,6 +43,7 @@ struct ovl_entry {
u64 version;
const char *redirect;
bool opaque;
+ bool impure;
bool copying;
};
struct rcu_head rcu;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 9828b7de8999..4882ffb37bae 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -891,6 +891,19 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
dput(temp);
else
pr_warn("overlayfs: upper fs does not support tmpfile.\n");
+
+ /*
+ * Check if upper/work fs supports trusted.overlay.*
+ * xattr
+ */
+ err = ovl_do_setxattr(ufs->workdir, OVL_XATTR_OPAQUE,
+ "0", 1, 0);
+ if (err) {
+ ufs->noxattr = true;
+ pr_warn("overlayfs: upper fs does not support xattr.\n");
+ } else {
+ vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE);
+ }
}
}
@@ -961,7 +974,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
path_put(&workpath);
kfree(lowertmp);
- oe->__upperdentry = upperpath.dentry;
+ if (upperpath.dentry) {
+ oe->__upperdentry = upperpath.dentry;
+ oe->impure = ovl_is_impuredir(upperpath.dentry);
+ }
for (i = 0; i < numlower; i++) {
oe->lowerstack[i].dentry = stack[i].dentry;
oe->lowerstack[i].mnt = ufs->lower_mnt[i];
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index cfdea47313a1..809048913889 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -175,6 +175,13 @@ bool ovl_dentry_is_opaque(struct dentry *dentry)
return oe->opaque;
}
+bool ovl_dentry_is_impure(struct dentry *dentry)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ return oe->impure;
+}
+
bool ovl_dentry_is_whiteout(struct dentry *dentry)
{
return !dentry->d_inode && ovl_dentry_is_opaque(dentry);
@@ -191,14 +198,7 @@ bool ovl_redirect_dir(struct super_block *sb)
{
struct ovl_fs *ofs = sb->s_fs_info;
- return ofs->config.redirect_dir;
-}
-
-void ovl_clear_redirect_dir(struct super_block *sb)
-{
- struct ovl_fs *ofs = sb->s_fs_info;
-
- ofs->config.redirect_dir = false;
+ return ofs->config.redirect_dir && !ofs->noxattr;
}
const char *ovl_dentry_get_redirect(struct dentry *dentry)
@@ -303,3 +303,59 @@ void ovl_copy_up_end(struct dentry *dentry)
wake_up_locked(&ofs->copyup_wq);
spin_unlock(&ofs->copyup_wq.lock);
}
+
+bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
+{
+ int res;
+ char val;
+
+ if (!d_is_dir(dentry))
+ return false;
+
+ res = vfs_getxattr(dentry, name, &val, 1);
+ if (res == 1 && val == 'y')
+ return true;
+
+ return false;
+}
+
+int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
+ const char *name, const void *value, size_t size,
+ int xerr)
+{
+ int err;
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+
+ if (ofs->noxattr)
+ return xerr;
+
+ err = ovl_do_setxattr(upperdentry, name, value, size, 0);
+
+ if (err == -EOPNOTSUPP) {
+ pr_warn("overlayfs: cannot set %s xattr on upper\n", name);
+ ofs->noxattr = true;
+ return xerr;
+ }
+
+ return err;
+}
+
+int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
+{
+ int err;
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ if (oe->impure)
+ return 0;
+
+ /*
+ * Do not fail when upper doesn't support xattrs.
+ * Upper inodes won't have origin nor redirect xattr anyway.
+ */
+ err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE,
+ "y", 1, 0);
+ if (!err)
+ oe->impure = true;
+
+ return err;
+}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 45f6bf68fff3..f1e1927ccd48 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -821,7 +821,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
if (!mmget_not_zero(mm))
goto free;
- flags = write ? FOLL_WRITE : 0;
+ flags = FOLL_FORCE | (write ? FOLL_WRITE : 0);
while (count > 0) {
int this_len = min_t(int, count, PAGE_SIZE);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index ebf80c7739e1..48813aeaab80 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1512,6 +1512,22 @@ int dquot_initialize(struct inode *inode)
}
EXPORT_SYMBOL(dquot_initialize);
+bool dquot_initialize_needed(struct inode *inode)
+{
+ struct dquot **dquots;
+ int i;
+
+ if (!dquot_active(inode))
+ return false;
+
+ dquots = i_dquot(inode);
+ for (i = 0; i < MAXQUOTAS; i++)
+ if (!dquots[i] && sb_has_quota_active(inode->i_sb, i))
+ return true;
+ return false;
+}
+EXPORT_SYMBOL(dquot_initialize_needed);
+
/*
* Release all quotas referenced by inode.
*
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index da01f497180a..39bb1e838d8d 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1112,7 +1112,7 @@ static int flush_commit_list(struct super_block *s,
depth = reiserfs_write_unlock_nested(s);
if (reiserfs_barrier_flush(s))
__sync_dirty_buffer(jl->j_commit_bh,
- REQ_PREFLUSH | REQ_FUA);
+ REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
else
sync_dirty_buffer(jl->j_commit_bh);
reiserfs_write_lock_nested(s, depth);
@@ -1271,7 +1271,7 @@ static int _update_journal_header_block(struct super_block *sb,
if (reiserfs_barrier_flush(sb))
__sync_dirty_buffer(journal->j_header_bh,
- REQ_PREFLUSH | REQ_FUA);
+ REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
else
sync_dirty_buffer(journal->j_header_bh);
diff --git a/fs/stat.c b/fs/stat.c
index f494b182c7c7..c35610845ab1 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -672,6 +672,7 @@ void __inode_add_bytes(struct inode *inode, loff_t bytes)
inode->i_bytes -= 512;
}
}
+EXPORT_SYMBOL(__inode_add_bytes);
void inode_add_bytes(struct inode *inode, loff_t bytes)
{
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index a0376a2c1c29..d642cc0a8271 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -82,7 +82,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
ufs_error (sb, "ufs_free_fragments",
"bit already cleared for fragment %u", i);
}
-
+
+ inode_sub_bytes(inode, count << uspi->s_fshift);
fs32_add(sb, &ucg->cg_cs.cs_nffree, count);
uspi->cs_total.cs_nffree += count;
fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count);
@@ -184,6 +185,7 @@ do_more:
ufs_error(sb, "ufs_free_blocks", "freeing free fragment");
}
ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
+ inode_sub_bytes(inode, uspi->s_fpb << uspi->s_fshift);
if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
ufs_clusteracct (sb, ucpi, blkno, 1);
@@ -494,6 +496,20 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
return 0;
}
+static bool try_add_frags(struct inode *inode, unsigned frags)
+{
+ unsigned size = frags * i_blocksize(inode);
+ spin_lock(&inode->i_lock);
+ __inode_add_bytes(inode, size);
+ if (unlikely((u32)inode->i_blocks != inode->i_blocks)) {
+ __inode_sub_bytes(inode, size);
+ spin_unlock(&inode->i_lock);
+ return false;
+ }
+ spin_unlock(&inode->i_lock);
+ return true;
+}
+
static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
unsigned oldcount, unsigned newcount)
{
@@ -530,6 +546,9 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
for (i = oldcount; i < newcount; i++)
if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i))
return 0;
+
+ if (!try_add_frags(inode, count))
+ return 0;
/*
* Block can be extended
*/
@@ -647,6 +666,7 @@ cg_found:
ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i);
i = uspi->s_fpb - count;
+ inode_sub_bytes(inode, i << uspi->s_fshift);
fs32_add(sb, &ucg->cg_cs.cs_nffree, i);
uspi->cs_total.cs_nffree += i;
fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, i);
@@ -657,6 +677,8 @@ cg_found:
result = ufs_bitmap_search (sb, ucpi, goal, allocsize);
if (result == INVBLOCK)
return 0;
+ if (!try_add_frags(inode, count))
+ return 0;
for (i = 0; i < count; i++)
ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, result + i);
@@ -716,6 +738,8 @@ norot:
return INVBLOCK;
ucpi->c_rotor = result;
gotit:
+ if (!try_add_frags(inode, uspi->s_fpb))
+ return 0;
blkno = ufs_fragstoblks(result);
ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 7e41aee7b69a..da553ffec85b 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -235,7 +235,8 @@ ufs_extend_tail(struct inode *inode, u64 writes_to,
p = ufs_get_direct_data_ptr(uspi, ufsi, block);
tmp = ufs_new_fragments(inode, p, lastfrag, ufs_data_ptr_to_cpu(sb, p),
- new_size, err, locked_page);
+ new_size - (lastfrag & uspi->s_fpbmask), err,
+ locked_page);
return tmp != 0;
}
@@ -284,7 +285,7 @@ ufs_inode_getfrag(struct inode *inode, unsigned index,
goal += uspi->s_fpb;
}
tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment),
- goal, uspi->s_fpb, err, locked_page);
+ goal, nfrags, err, locked_page);
if (!tmp) {
*err = -ENOSPC;
@@ -402,7 +403,9 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
if (!create) {
phys64 = ufs_frag_map(inode, offsets, depth);
- goto out;
+ if (phys64)
+ map_bh(bh_result, sb, phys64 + frag);
+ return 0;
}
/* This code entered only while writing ....? */
@@ -841,8 +844,11 @@ void ufs_evict_inode(struct inode * inode)
truncate_inode_pages_final(&inode->i_data);
if (want_delete) {
inode->i_size = 0;
- if (inode->i_blocks)
+ if (inode->i_blocks &&
+ (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)))
ufs_truncate_blocks(inode);
+ ufs_update_inode(inode, inode_needs_sync(inode));
}
invalidate_inode_buffers(inode);
@@ -1100,7 +1106,7 @@ out:
return err;
}
-static void __ufs_truncate_blocks(struct inode *inode)
+static void ufs_truncate_blocks(struct inode *inode)
{
struct ufs_inode_info *ufsi = UFS_I(inode);
struct super_block *sb = inode->i_sb;
@@ -1183,7 +1189,7 @@ static int ufs_truncate(struct inode *inode, loff_t size)
truncate_setsize(inode, size);
- __ufs_truncate_blocks(inode);
+ ufs_truncate_blocks(inode);
inode->i_mtime = inode->i_ctime = current_time(inode);
mark_inode_dirty(inode);
out:
@@ -1191,16 +1197,6 @@ out:
return err;
}
-static void ufs_truncate_blocks(struct inode *inode)
-{
- if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- S_ISLNK(inode->i_mode)))
- return;
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
- return;
- __ufs_truncate_blocks(inode);
-}
-
int ufs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 131b2b77c818..878cc6264f1a 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -746,6 +746,23 @@ static void ufs_put_super(struct super_block *sb)
return;
}
+static u64 ufs_max_bytes(struct super_block *sb)
+{
+ struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
+ int bits = uspi->s_apbshift;
+ u64 res;
+
+ if (bits > 21)
+ res = ~0ULL;
+ else
+ res = UFS_NDADDR + (1LL << bits) + (1LL << (2*bits)) +
+ (1LL << (3*bits));
+
+ if (res >= (MAX_LFS_FILESIZE >> uspi->s_bshift))
+ return MAX_LFS_FILESIZE;
+ return res << uspi->s_bshift;
+}
+
static int ufs_fill_super(struct super_block *sb, void *data, int silent)
{
struct ufs_sb_info * sbi;
@@ -812,9 +829,8 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
uspi->s_dirblksize = UFS_SECTOR_SIZE;
super_block_offset=UFS_SBLOCK;
- /* Keep 2Gig file limit. Some UFS variants need to override
- this but as I don't know which I'll let those in the know loosen
- the rules */
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+
switch (sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) {
case UFS_MOUNT_UFSTYPE_44BSD:
UFSD("ufstype=44bsd\n");
@@ -1212,6 +1228,7 @@ magic_found:
"fast symlink size (%u)\n", uspi->s_maxsymlinklen);
uspi->s_maxsymlinklen = maxsymlen;
}
+ sb->s_maxbytes = ufs_max_bytes(sb);
sb->s_max_links = UFS_LINK_MAX;
inode = ufs_iget(sb, UFS_ROOTINO);
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index b7fbf53dbc81..398019fb1448 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -473,15 +473,19 @@ static inline unsigned _ubh_find_last_zero_bit_(
static inline int _ubh_isblockset_(struct ufs_sb_private_info * uspi,
struct ufs_buffer_head * ubh, unsigned begin, unsigned block)
{
+ u8 mask;
switch (uspi->s_fpb) {
case 8:
return (*ubh_get_addr (ubh, begin + block) == 0xff);
case 4:
- return (*ubh_get_addr (ubh, begin + (block >> 1)) == (0x0f << ((block & 0x01) << 2)));
+ mask = 0x0f << ((block & 0x01) << 2);
+ return (*ubh_get_addr (ubh, begin + (block >> 1)) & mask) == mask;
case 2:
- return (*ubh_get_addr (ubh, begin + (block >> 2)) == (0x03 << ((block & 0x03) << 1)));
+ mask = 0x03 << ((block & 0x03) << 1);
+ return (*ubh_get_addr (ubh, begin + (block >> 2)) & mask) == mask;
case 1:
- return (*ubh_get_addr (ubh, begin + (block >> 3)) == (0x01 << (block & 0x07)));
+ mask = 0x01 << (block & 0x07);
+ return (*ubh_get_addr (ubh, begin + (block >> 3)) & mask) == mask;
}
return 0;
}
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index f02eb7673392..a7048eafa8e6 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1280,7 +1280,6 @@ xfs_bmap_read_extents(
xfs_bmbt_rec_t *frp;
xfs_fsblock_t nextbno;
xfs_extnum_t num_recs;
- xfs_extnum_t start;
num_recs = xfs_btree_get_numrecs(block);
if (unlikely(i + num_recs > room)) {
@@ -1303,7 +1302,6 @@ xfs_bmap_read_extents(
* Copy records into the extent records.
*/
frp = XFS_BMBT_REC_ADDR(mp, block, 1);
- start = i;
for (j = 0; j < num_recs; j++, i++, frp++) {
xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
trp->l0 = be64_to_cpu(frp->l0);
@@ -2065,8 +2063,10 @@ xfs_bmap_add_extent_delay_real(
}
temp = xfs_bmap_worst_indlen(bma->ip, temp);
temp2 = xfs_bmap_worst_indlen(bma->ip, temp2);
- diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
- (bma->cur ? bma->cur->bc_private.b.allocated : 0));
+ diff = (int)(temp + temp2 -
+ (startblockval(PREV.br_startblock) -
+ (bma->cur ?
+ bma->cur->bc_private.b.allocated : 0)));
if (diff > 0) {
error = xfs_mod_fdblocks(bma->ip->i_mount,
-((int64_t)diff), false);
@@ -2123,7 +2123,6 @@ xfs_bmap_add_extent_delay_real(
temp = da_new;
if (bma->cur)
temp += bma->cur->bc_private.b.allocated;
- ASSERT(temp <= da_old);
if (temp < da_old)
xfs_mod_fdblocks(bma->ip->i_mount,
(int64_t)(da_old - temp), false);
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 5392674bf893..3a673ba201aa 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4395,7 +4395,7 @@ xfs_btree_visit_blocks(
xfs_btree_readahead_ptr(cur, ptr, 1);
/* save for the next iteration of the loop */
- lptr = *ptr;
+ xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
}
/* for each buffer in the level */
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index b177ef33cd4c..82a38d86ebad 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1629,13 +1629,28 @@ xfs_refcount_recover_cow_leftovers(
if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START)
return -EOPNOTSUPP;
- error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+ INIT_LIST_HEAD(&debris);
+
+ /*
+ * In this first part, we use an empty transaction to gather up
+ * all the leftover CoW extents so that we can subsequently
+ * delete them. The empty transaction is used to avoid
+ * a buffer lock deadlock if there happens to be a loop in the
+ * refcountbt because we're allowed to re-grab a buffer that is
+ * already attached to our transaction. When we're done
+ * recording the CoW debris we cancel the (empty) transaction
+ * and everything goes away cleanly.
+ */
+ error = xfs_trans_alloc_empty(mp, &tp);
if (error)
return error;
- cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
+
+ error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
+ if (error)
+ goto out_trans;
+ cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL);
/* Find all the leftover CoW staging extents. */
- INIT_LIST_HEAD(&debris);
memset(&low, 0, sizeof(low));
memset(&high, 0, sizeof(high));
low.rc.rc_startblock = XFS_REFC_COW_START;
@@ -1645,10 +1660,11 @@ xfs_refcount_recover_cow_leftovers(
if (error)
goto out_cursor;
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
- xfs_buf_relse(agbp);
+ xfs_trans_brelse(tp, agbp);
+ xfs_trans_cancel(tp);
/* Now iterate the list to free the leftovers */
- list_for_each_entry(rr, &debris, rr_list) {
+ list_for_each_entry_safe(rr, n, &debris, rr_list) {
/* Set up transaction. */
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
if (error)
@@ -1676,8 +1692,16 @@ xfs_refcount_recover_cow_leftovers(
error = xfs_trans_commit(tp);
if (error)
goto out_free;
+
+ list_del(&rr->rr_list);
+ kmem_free(rr);
}
+ return error;
+out_defer:
+ xfs_defer_cancel(&dfops);
+out_trans:
+ xfs_trans_cancel(tp);
out_free:
/* Free the leftover list */
list_for_each_entry_safe(rr, n, &debris, rr_list) {
@@ -1688,11 +1712,6 @@ out_free:
out_cursor:
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
- xfs_buf_relse(agbp);
- goto out_free;
-
-out_defer:
- xfs_defer_cancel(&dfops);
- xfs_trans_cancel(tp);
- goto out_free;
+ xfs_trans_brelse(tp, agbp);
+ goto out_trans;
}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 2b954308a1d6..9e3cc2146d5b 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -582,9 +582,13 @@ xfs_getbmap(
}
break;
default:
+ /* Local format data forks report no extents. */
+ if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ bmv->bmv_entries = 0;
+ return 0;
+ }
if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
- ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
- ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
+ ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
return -EINVAL;
if (xfs_get_extsz_hint(ip) ||
@@ -712,7 +716,7 @@ xfs_getbmap(
* extents.
*/
if (map[i].br_startblock == DELAYSTARTBLOCK &&
- map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
+ map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
ASSERT((iflags & BMV_IF_DELALLOC) != 0);
if (map[i].br_startblock == HOLESTARTBLOCK &&
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 62fa39276a24..07b77b73b024 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -97,12 +97,16 @@ static inline void
xfs_buf_ioacct_inc(
struct xfs_buf *bp)
{
- if (bp->b_flags & (XBF_NO_IOACCT|_XBF_IN_FLIGHT))
+ if (bp->b_flags & XBF_NO_IOACCT)
return;
ASSERT(bp->b_flags & XBF_ASYNC);
- bp->b_flags |= _XBF_IN_FLIGHT;
- percpu_counter_inc(&bp->b_target->bt_io_count);
+ spin_lock(&bp->b_lock);
+ if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) {
+ bp->b_state |= XFS_BSTATE_IN_FLIGHT;
+ percpu_counter_inc(&bp->b_target->bt_io_count);
+ }
+ spin_unlock(&bp->b_lock);
}
/*
@@ -110,14 +114,24 @@ xfs_buf_ioacct_inc(
* freed and unaccount from the buftarg.
*/
static inline void
-xfs_buf_ioacct_dec(
+__xfs_buf_ioacct_dec(
struct xfs_buf *bp)
{
- if (!(bp->b_flags & _XBF_IN_FLIGHT))
- return;
+ ASSERT(spin_is_locked(&bp->b_lock));
- bp->b_flags &= ~_XBF_IN_FLIGHT;
- percpu_counter_dec(&bp->b_target->bt_io_count);
+ if (bp->b_state & XFS_BSTATE_IN_FLIGHT) {
+ bp->b_state &= ~XFS_BSTATE_IN_FLIGHT;
+ percpu_counter_dec(&bp->b_target->bt_io_count);
+ }
+}
+
+static inline void
+xfs_buf_ioacct_dec(
+ struct xfs_buf *bp)
+{
+ spin_lock(&bp->b_lock);
+ __xfs_buf_ioacct_dec(bp);
+ spin_unlock(&bp->b_lock);
}
/*
@@ -149,9 +163,9 @@ xfs_buf_stale(
* unaccounted (released to LRU) before that occurs. Drop in-flight
* status now to preserve accounting consistency.
*/
- xfs_buf_ioacct_dec(bp);
-
spin_lock(&bp->b_lock);
+ __xfs_buf_ioacct_dec(bp);
+
atomic_set(&bp->b_lru_ref, 0);
if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
(list_lru_del(&bp->b_target->bt_lru, &bp->b_lru)))
@@ -979,12 +993,12 @@ xfs_buf_rele(
* ensures the decrement occurs only once per-buf.
*/
if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru))
- xfs_buf_ioacct_dec(bp);
+ __xfs_buf_ioacct_dec(bp);
goto out_unlock;
}
/* the last reference has been dropped ... */
- xfs_buf_ioacct_dec(bp);
+ __xfs_buf_ioacct_dec(bp);
if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
/*
* If the buffer is added to the LRU take a new reference to the
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 8d1d44f87ce9..1508121f29f2 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -63,7 +63,6 @@ typedef enum {
#define _XBF_KMEM (1 << 21)/* backed by heap memory */
#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */
#define _XBF_COMPOUND (1 << 23)/* compound buffer */
-#define _XBF_IN_FLIGHT (1 << 25) /* I/O in flight, for accounting purposes */
typedef unsigned int xfs_buf_flags_t;
@@ -84,14 +83,14 @@ typedef unsigned int xfs_buf_flags_t;
{ _XBF_PAGES, "PAGES" }, \
{ _XBF_KMEM, "KMEM" }, \
{ _XBF_DELWRI_Q, "DELWRI_Q" }, \
- { _XBF_COMPOUND, "COMPOUND" }, \
- { _XBF_IN_FLIGHT, "IN_FLIGHT" }
+ { _XBF_COMPOUND, "COMPOUND" }
/*
* Internal state flags.
*/
#define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */
+#define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */
/*
* The xfs_buftarg contains 2 notions of "sector size" -
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 35703a801372..5fb5a0958a14 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1043,49 +1043,17 @@ xfs_find_get_desired_pgoff(
index = startoff >> PAGE_SHIFT;
endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
- end = endoff >> PAGE_SHIFT;
+ end = (endoff - 1) >> PAGE_SHIFT;
do {
int want;
unsigned nr_pages;
unsigned int i;
- want = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
+ want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
want);
- /*
- * No page mapped into given range. If we are searching holes
- * and if this is the first time we got into the loop, it means
- * that the given offset is landed in a hole, return it.
- *
- * If we have already stepped through some block buffers to find
- * holes but they all contains data. In this case, the last
- * offset is already updated and pointed to the end of the last
- * mapped page, if it does not reach the endpoint to search,
- * that means there should be a hole between them.
- */
- if (nr_pages == 0) {
- /* Data search found nothing */
- if (type == DATA_OFF)
- break;
-
- ASSERT(type == HOLE_OFF);
- if (lastoff == startoff || lastoff < endoff) {
- found = true;
- *offset = lastoff;
- }
- break;
- }
-
- /*
- * At lease we found one page. If this is the first time we
- * step into the loop, and if the first page index offset is
- * greater than the given search offset, a hole was found.
- */
- if (type == HOLE_OFF && lastoff == startoff &&
- lastoff < page_offset(pvec.pages[0])) {
- found = true;
+ if (nr_pages == 0)
break;
- }
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
@@ -1098,18 +1066,18 @@ xfs_find_get_desired_pgoff(
* file mapping. However, page->index will not change
* because we have a reference on the page.
*
- * Searching done if the page index is out of range.
- * If the current offset is not reaches the end of
- * the specified search range, there should be a hole
- * between them.
+ * If current page offset is beyond where we've ended,
+ * we've found a hole.
*/
- if (page->index > end) {
- if (type == HOLE_OFF && lastoff < endoff) {
- *offset = lastoff;
- found = true;
- }
+ if (type == HOLE_OFF && lastoff < endoff &&
+ lastoff < page_offset(pvec.pages[i])) {
+ found = true;
+ *offset = lastoff;
goto out;
}
+ /* Searching done if the page index is out of range. */
+ if (page->index > end)
+ goto out;
lock_page(page);
/*
@@ -1151,21 +1119,20 @@ xfs_find_get_desired_pgoff(
/*
* The number of returned pages less than our desired, search
- * done. In this case, nothing was found for searching data,
- * but we found a hole behind the last offset.
+ * done.
*/
- if (nr_pages < want) {
- if (type == HOLE_OFF) {
- *offset = lastoff;
- found = true;
- }
+ if (nr_pages < want)
break;
- }
index = pvec.pages[i - 1]->index + 1;
pagevec_release(&pvec);
} while (index <= end);
+ /* No page at lastoff and we are not done - we found a hole. */
+ if (type == HOLE_OFF && lastoff < endoff) {
+ *offset = lastoff;
+ found = true;
+ }
out:
pagevec_release(&pvec);
return found;
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 3683819887a5..814ed729881d 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -828,6 +828,7 @@ xfs_getfsmap(
struct xfs_fsmap dkeys[2]; /* per-dev keys */
struct xfs_getfsmap_dev handlers[XFS_GETFSMAP_DEVS];
struct xfs_getfsmap_info info = { NULL };
+ bool use_rmap;
int i;
int error = 0;
@@ -837,12 +838,14 @@ xfs_getfsmap(
!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1]))
return -EINVAL;
+ use_rmap = capable(CAP_SYS_ADMIN) &&
+ xfs_sb_version_hasrmapbt(&mp->m_sb);
head->fmh_entries = 0;
/* Set up our device handlers. */
memset(handlers, 0, sizeof(handlers));
handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
- if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ if (use_rmap)
handlers[0].fn = xfs_getfsmap_datadev_rmapbt;
else
handlers[0].fn = xfs_getfsmap_datadev_bnobt;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index a63f61c256bd..94e5bdf7304c 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1068,7 +1068,7 @@ xfs_file_iomap_begin(
/* optionally associate a dax device with the iomap bdev */
bdev = iomap->bdev;
if (blk_queue_dax(bdev->bd_queue))
- iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+ iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name);
else
iomap->dax_dev = NULL;
@@ -1149,7 +1149,7 @@ xfs_file_iomap_end(
unsigned flags,
struct iomap *iomap)
{
- put_dax(iomap->dax_dev);
+ fs_put_dax(iomap->dax_dev);
if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
length, written, iomap);