aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/dir.c1
-rw-r--r--fs/afs/file.c1
-rw-r--r--fs/afs/fs_operation.c7
-rw-r--r--fs/afs/inode.c1
-rw-r--r--fs/afs/internal.h1
-rw-r--r--fs/afs/mntpt.c1
-rw-r--r--fs/afs/write.c3
-rw-r--r--fs/afs/xattr.c31
-rw-r--r--fs/block_dev.c8
-rw-r--r--fs/btrfs/Makefile10
-rw-r--r--fs/btrfs/ctree.c2
-rw-r--r--fs/btrfs/dev-replace.c3
-rw-r--r--fs/btrfs/disk-io.c19
-rw-r--r--fs/btrfs/extent-tree.c23
-rw-r--r--fs/btrfs/extent_io.c33
-rw-r--r--fs/btrfs/inode.c55
-rw-r--r--fs/btrfs/qgroup.c12
-rw-r--r--fs/btrfs/reada.c35
-rw-r--r--fs/btrfs/tree-log.c8
-rw-r--r--fs/btrfs/volumes.c3
-rw-r--r--fs/cachefiles/bind.c6
-rw-r--r--fs/cachefiles/rdwr.c7
-rw-r--r--fs/cifs/cifs_swn.c2
-rw-r--r--fs/cifs/cifsacl.c8
-rw-r--r--fs/cifs/cifsglob.h4
-rw-r--r--fs/cifs/cifspdu.h5
-rw-r--r--fs/cifs/file.c1
-rw-r--r--fs/cifs/fs_context.c6
-rw-r--r--fs/cifs/inode.c10
-rw-r--r--fs/cifs/smb2glob.h1
-rw-r--r--fs/cifs/smb2misc.c4
-rw-r--r--fs/cifs/smb2ops.c27
-rw-r--r--fs/cifs/smb2transport.c37
-rw-r--r--fs/cifs/transport.c7
-rw-r--r--fs/ext4/balloc.c38
-rw-r--r--fs/ext4/ext4.h3
-rw-r--r--fs/ext4/extents.c2
-rw-r--r--fs/ext4/fast_commit.c9
-rw-r--r--fs/ext4/inode.c18
-rw-r--r--fs/ext4/mballoc.c11
-rw-r--r--fs/ext4/namei.c50
-rw-r--r--fs/ext4/super.c7
-rw-r--r--fs/ext4/sysfs.c7
-rw-r--r--fs/ext4/verity.c89
-rw-r--r--fs/ext4/xattr.c6
-rw-r--r--fs/fuse/dev.c26
-rw-r--r--fs/fuse/fuse_i.h1
-rw-r--r--fs/fuse/virtio_fs.c9
-rw-r--r--fs/gfs2/super.c14
-rw-r--r--fs/io-wq.c44
-rw-r--r--fs/io-wq.h10
-rw-r--r--fs/io_uring.c371
-rw-r--r--fs/iomap/swapfile.c10
-rw-r--r--fs/locks.c3
-rw-r--r--fs/nfsd/Kconfig1
-rw-r--r--fs/nfsd/filecache.c2
-rw-r--r--fs/nfsd/nfs4callback.c1
-rw-r--r--fs/nfsd/nfs4proc.c2
-rw-r--r--fs/nfsd/nfs4state.c55
-rw-r--r--fs/reiserfs/xattr.h2
-rw-r--r--fs/select.c10
-rw-r--r--fs/squashfs/export.c8
-rw-r--r--fs/squashfs/id.c6
-rw-r--r--fs/squashfs/squashfs_fs.h1
-rw-r--r--fs/squashfs/xattr_id.c6
-rw-r--r--fs/xfs/xfs_inode.c14
-rw-r--r--fs/xfs/xfs_itable.c6
-rw-r--r--fs/xfs/xfs_mount.c90
-rw-r--r--fs/xfs/xfs_symlink.c3
-rw-r--r--fs/zonefs/super.c101
70 files changed, 918 insertions, 500 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 714fcca9af99..17548c1faf02 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -70,7 +70,6 @@ const struct inode_operations afs_dir_inode_operations = {
.permission = afs_permission,
.getattr = afs_getattr,
.setattr = afs_setattr,
- .listxattr = afs_listxattr,
};
const struct address_space_operations afs_dir_aops = {
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 85f5adf21aa0..960b64268623 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -43,7 +43,6 @@ const struct inode_operations afs_file_inode_operations = {
.getattr = afs_getattr,
.setattr = afs_setattr,
.permission = afs_permission,
- .listxattr = afs_listxattr,
};
const struct address_space_operations afs_fs_aops = {
diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c
index 97cab12b0a6c..71c58723763d 100644
--- a/fs/afs/fs_operation.c
+++ b/fs/afs/fs_operation.c
@@ -181,10 +181,13 @@ void afs_wait_for_operation(struct afs_operation *op)
if (test_bit(AFS_SERVER_FL_IS_YFS, &op->server->flags) &&
op->ops->issue_yfs_rpc)
op->ops->issue_yfs_rpc(op);
- else
+ else if (op->ops->issue_afs_rpc)
op->ops->issue_afs_rpc(op);
+ else
+ op->ac.error = -ENOTSUPP;
- op->error = afs_wait_for_call_to_complete(op->call, &op->ac);
+ if (op->call)
+ op->error = afs_wait_for_call_to_complete(op->call, &op->ac);
}
switch (op->error) {
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 1156b2df28d3..12be88716e4c 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -27,7 +27,6 @@
static const struct inode_operations afs_symlink_inode_operations = {
.get_link = page_get_link,
- .listxattr = afs_listxattr,
};
static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode)
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index b626e38e9ab5..1627b1872812 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -1509,7 +1509,6 @@ extern int afs_launder_page(struct page *);
* xattr.c
*/
extern const struct xattr_handler *afs_xattr_handlers[];
-extern ssize_t afs_listxattr(struct dentry *, char *, size_t);
/*
* yfsclient.c
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 052dab2f5c03..bbb2c210d139 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -32,7 +32,6 @@ const struct inode_operations afs_mntpt_inode_operations = {
.lookup = afs_mntpt_lookup,
.readlink = page_readlink,
.getattr = afs_getattr,
- .listxattr = afs_listxattr,
};
const struct inode_operations afs_autocell_inode_operations = {
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c9195fc67fd8..eb737ed63afb 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -851,8 +851,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
fscache_wait_on_page_write(vnode->cache, vmf->page);
#endif
- if (PageWriteback(vmf->page) &&
- wait_on_page_bit_killable(vmf->page, PG_writeback) < 0)
+ if (wait_on_page_writeback_killable(vmf->page))
return VM_FAULT_RETRY;
if (lock_page_killable(vmf->page) < 0)
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index c629caae5002..7751b0b3f81d 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -11,29 +11,6 @@
#include <linux/xattr.h>
#include "internal.h"
-static const char afs_xattr_list[] =
- "afs.acl\0"
- "afs.cell\0"
- "afs.fid\0"
- "afs.volume\0"
- "afs.yfs.acl\0"
- "afs.yfs.acl_inherited\0"
- "afs.yfs.acl_num_cleaned\0"
- "afs.yfs.vol_acl";
-
-/*
- * Retrieve a list of the supported xattrs.
- */
-ssize_t afs_listxattr(struct dentry *dentry, char *buffer, size_t size)
-{
- if (size == 0)
- return sizeof(afs_xattr_list);
- if (size < sizeof(afs_xattr_list))
- return -ERANGE;
- memcpy(buffer, afs_xattr_list, sizeof(afs_xattr_list));
- return sizeof(afs_xattr_list);
-}
-
/*
* Deal with the result of a successful fetch ACL operation.
*/
@@ -231,6 +208,8 @@ static int afs_xattr_get_yfs(const struct xattr_handler *handler,
else
ret = -ERANGE;
}
+ } else if (ret == -ENOTSUPP) {
+ ret = -ENODATA;
}
error_yacl:
@@ -256,6 +235,7 @@ static int afs_xattr_set_yfs(const struct xattr_handler *handler,
{
struct afs_operation *op;
struct afs_vnode *vnode = AFS_FS_I(inode);
+ int ret;
if (flags == XATTR_CREATE ||
strcmp(name, "acl") != 0)
@@ -270,7 +250,10 @@ static int afs_xattr_set_yfs(const struct xattr_handler *handler,
return afs_put_operation(op);
op->ops = &yfs_store_opaque_acl2_operation;
- return afs_do_sync_operation(op);
+ ret = afs_do_sync_operation(op);
+ if (ret == -ENOTSUPP)
+ ret = -ENODATA;
+ return ret;
}
static const struct xattr_handler afs_xattr_yfs_handler = {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 92ed7d5df677..09d6f7229db9 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -275,6 +275,8 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
bio.bi_opf = dio_bio_write_op(iocb);
task_io_account_write(ret);
}
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ bio.bi_opf |= REQ_NOWAIT;
if (iocb->ki_flags & IOCB_HIPRI)
bio_set_polled(&bio, iocb);
@@ -428,6 +430,8 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
bio->bi_opf = dio_bio_write_op(iocb);
task_io_account_write(bio->bi_iter.bi_size);
}
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ bio->bi_opf |= REQ_NOWAIT;
dio->size += bio->bi_iter.bi_size;
pos += bio->bi_iter.bi_size;
@@ -1240,13 +1244,13 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
lockdep_assert_held(&bdev->bd_mutex);
- clear_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
-
rescan:
ret = blk_drop_partitions(bdev);
if (ret)
return ret;
+ clear_bit(GD_NEED_PART_SCAN, &disk->state);
+
/*
* Historically we only set the capacity to zero for devices that
* support partitions (independ of actually having partitions created).
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index b634c42115ea..b4fb997eda16 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -7,10 +7,12 @@ subdir-ccflags-y += -Wmissing-format-attribute
subdir-ccflags-y += -Wmissing-prototypes
subdir-ccflags-y += -Wold-style-definition
subdir-ccflags-y += -Wmissing-include-dirs
-subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable)
-subdir-ccflags-y += $(call cc-option, -Wunused-const-variable)
-subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned)
-subdir-ccflags-y += $(call cc-option, -Wstringop-truncation)
+condflags := \
+ $(call cc-option, -Wunused-but-set-variable) \
+ $(call cc-option, -Wunused-const-variable) \
+ $(call cc-option, -Wpacked-not-aligned) \
+ $(call cc-option, -Wstringop-truncation)
+subdir-ccflags-y += $(condflags)
# The following turn off the warnings enabled by -Wextra
subdir-ccflags-y += -Wno-missing-field-initializers
subdir-ccflags-y += -Wno-sign-compare
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d56730a67885..34b929bd5c1a 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1365,7 +1365,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
"failed to read tree block %llu from get_old_root",
logical);
} else {
+ btrfs_tree_read_lock(old);
eb = btrfs_clone_extent_buffer(old);
+ btrfs_tree_read_unlock(old);
free_extent_buffer(old);
}
} else if (old_root) {
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 3a9c1e046ebe..d05f73530af7 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -81,6 +81,9 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
struct btrfs_dev_replace_item *ptr;
u64 src_devid;
+ if (!dev_root)
+ return 0;
+
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 41b718cfea40..289f1f09481d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2387,8 +2387,9 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
} else {
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->dev_root = root;
- btrfs_init_devices_late(fs_info);
}
+ /* Initialize fs_info for all devices in any case */
+ btrfs_init_devices_late(fs_info);
/* If IGNOREDATACSUMS is set don't bother reading the csum root. */
if (!btrfs_test_opt(fs_info, IGNOREDATACSUMS)) {
@@ -3009,6 +3010,21 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
}
}
+ /*
+ * btrfs_find_orphan_roots() is responsible for finding all the dead
+ * roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load
+ * them into the fs_info->fs_roots_radix tree. This must be done before
+ * calling btrfs_orphan_cleanup() on the tree root. If we don't do it
+ * first, then btrfs_orphan_cleanup() will delete a dead root's orphan
+ * item before the root's tree is deleted - this means that if we unmount
+ * or crash before the deletion completes, on the next mount we will not
+ * delete what remains of the tree because the orphan item does not
+ * exists anymore, which is what tells us we have a pending deletion.
+ */
+ ret = btrfs_find_orphan_roots(fs_info);
+ if (ret)
+ goto out;
+
ret = btrfs_cleanup_fs_roots(fs_info);
if (ret)
goto out;
@@ -3068,7 +3084,6 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
}
}
- ret = btrfs_find_orphan_roots(fs_info);
out:
return ret;
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 78ad31a59e59..36a3c973fda1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3323,6 +3323,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
if (last_ref && btrfs_header_generation(buf) == trans->transid) {
struct btrfs_block_group *cache;
+ bool must_pin = false;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
ret = check_ref_cleanup(trans, buf->start);
@@ -3340,7 +3341,27 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
goto out;
}
- if (btrfs_is_zoned(fs_info)) {
+ /*
+ * If this is a leaf and there are tree mod log users, we may
+ * have recorded mod log operations that point to this leaf.
+ * So we must make sure no one reuses this leaf's extent before
+ * mod log operations are applied to a node, otherwise after
+ * rewinding a node using the mod log operations we get an
+ * inconsistent btree, as the leaf's extent may now be used as
+ * a node or leaf for another different btree.
+ * We are safe from races here because at this point no other
+ * node or root points to this extent buffer, so if after this
+ * check a new tree mod log user joins, it will not be able to
+ * find a node pointing to this leaf and record operations that
+ * point to this leaf.
+ */
+ if (btrfs_header_level(buf) == 0) {
+ read_lock(&fs_info->tree_mod_log_lock);
+ must_pin = !list_empty(&fs_info->tree_mod_seq_list);
+ read_unlock(&fs_info->tree_mod_log_lock);
+ }
+
+ if (must_pin || btrfs_is_zoned(fs_info)) {
btrfs_redirty_list_add(trans->transaction, buf);
pin_down_extent(trans, cache, buf->start, buf->len, 1);
btrfs_put_block_group(cache);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 191e358f1322..910769d5fcdb 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2886,6 +2886,35 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
}
/*
+ * Find extent buffer for a givne bytenr.
+ *
+ * This is for end_bio_extent_readpage(), thus we can't do any unsafe locking
+ * in endio context.
+ */
+static struct extent_buffer *find_extent_buffer_readpage(
+ struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
+{
+ struct extent_buffer *eb;
+
+ /*
+ * For regular sectorsize, we can use page->private to grab extent
+ * buffer
+ */
+ if (fs_info->sectorsize == PAGE_SIZE) {
+ ASSERT(PagePrivate(page) && page->private);
+ return (struct extent_buffer *)page->private;
+ }
+
+ /* For subpage case, we need to lookup buffer radix tree */
+ rcu_read_lock();
+ eb = radix_tree_lookup(&fs_info->buffer_radix,
+ bytenr >> fs_info->sectorsize_bits);
+ rcu_read_unlock();
+ ASSERT(eb);
+ return eb;
+}
+
+/*
* after a readpage IO is done, we need to:
* clear the uptodate bits on error
* set the uptodate bits if things worked
@@ -2996,7 +3025,7 @@ static void end_bio_extent_readpage(struct bio *bio)
} else {
struct extent_buffer *eb;
- eb = (struct extent_buffer *)page->private;
+ eb = find_extent_buffer_readpage(fs_info, page, start);
set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = mirror;
atomic_dec(&eb->io_pages);
@@ -3020,7 +3049,7 @@ readpage_ok:
*/
if (page->index == end_index && i_size <= end) {
u32 zero_start = max(offset_in_page(i_size),
- offset_in_page(end));
+ offset_in_page(start));
zero_user_segment(page, zero_start,
offset_in_page(end) + 1);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 35bfa0533f23..a520775949a0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3099,11 +3099,13 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
* @bio_offset: offset to the beginning of the bio (in bytes)
* @page: page where is the data to be verified
* @pgoff: offset inside the page
+ * @start: logical offset in the file
*
* The length of such check is always one sector size.
*/
static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
- u32 bio_offset, struct page *page, u32 pgoff)
+ u32 bio_offset, struct page *page, u32 pgoff,
+ u64 start)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
@@ -3130,8 +3132,8 @@ static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
kunmap_atomic(kaddr);
return 0;
zeroit:
- btrfs_print_data_csum_error(BTRFS_I(inode), page_offset(page) + pgoff,
- csum, csum_expected, io_bio->mirror_num);
+ btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
+ io_bio->mirror_num);
if (io_bio->device)
btrfs_dev_stat_inc_and_print(io_bio->device,
BTRFS_DEV_STAT_CORRUPTION_ERRS);
@@ -3184,7 +3186,8 @@ int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
pg_off += sectorsize, bio_offset += sectorsize) {
int ret;
- ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off);
+ ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off,
+ page_offset(page) + pg_off);
if (ret < 0)
return -EIO;
}
@@ -7910,7 +7913,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
ASSERT(pgoff < PAGE_SIZE);
if (uptodate &&
(!csum || !check_data_csum(inode, io_bio,
- bio_offset, bvec.bv_page, pgoff))) {
+ bio_offset, bvec.bv_page,
+ pgoff, start))) {
clean_io_failure(fs_info, failure_tree, io_tree,
start, bvec.bv_page,
btrfs_ino(BTRFS_I(inode)),
@@ -8169,10 +8173,6 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
bio->bi_end_io = btrfs_end_dio_bio;
btrfs_io_bio(bio)->logical = file_offset;
- WARN_ON_ONCE(write && btrfs_is_zoned(fs_info) &&
- fs_info->max_zone_append_size &&
- bio_op(bio) != REQ_OP_ZONE_APPEND);
-
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
status = extract_ordered_extent(BTRFS_I(inode), bio,
file_offset);
@@ -9008,7 +9008,7 @@ int __init btrfs_init_cachep(void)
btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap",
PAGE_SIZE, PAGE_SIZE,
- SLAB_RED_ZONE, NULL);
+ SLAB_MEM_SPREAD, NULL);
if (!btrfs_free_space_bitmap_cachep)
goto fail;
@@ -9877,6 +9877,7 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
struct btrfs_path *path;
u64 start = ins->objectid;
u64 len = ins->offset;
+ int qgroup_released;
int ret;
memset(&stack_fi, 0, sizeof(stack_fi));
@@ -9889,16 +9890,16 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE);
/* Encryption and other encoding is reserved and all 0 */
- ret = btrfs_qgroup_release_data(inode, file_offset, len);
- if (ret < 0)
- return ERR_PTR(ret);
+ qgroup_released = btrfs_qgroup_release_data(inode, file_offset, len);
+ if (qgroup_released < 0)
+ return ERR_PTR(qgroup_released);
if (trans) {
ret = insert_reserved_file_extent(trans, inode,
file_offset, &stack_fi,
- true, ret);
+ true, qgroup_released);
if (ret)
- return ERR_PTR(ret);
+ goto free_qgroup;
return trans;
}
@@ -9909,21 +9910,35 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
extent_info.file_offset = file_offset;
extent_info.extent_buf = (char *)&stack_fi;
extent_info.is_new_extent = true;
- extent_info.qgroup_reserved = ret;
+ extent_info.qgroup_reserved = qgroup_released;
extent_info.insertions = 0;
path = btrfs_alloc_path();
- if (!path)
- return ERR_PTR(-ENOMEM);
+ if (!path) {
+ ret = -ENOMEM;
+ goto free_qgroup;
+ }
ret = btrfs_replace_file_extents(&inode->vfs_inode, path, file_offset,
file_offset + len - 1, &extent_info,
&trans);
btrfs_free_path(path);
if (ret)
- return ERR_PTR(ret);
-
+ goto free_qgroup;
return trans;
+
+free_qgroup:
+ /*
+ * We have released qgroup data range at the beginning of the function,
+ * and normally qgroup_released bytes will be freed when committing
+ * transaction.
+ * But if we error out early, we have to free what we have released
+ * or we leak qgroup data reservation.
+ */
+ btrfs_qgroup_free_refroot(inode->root->fs_info,
+ inode->root->root_key.objectid, qgroup_released,
+ BTRFS_QGROUP_RSV_DATA);
+ return ERR_PTR(ret);
}
static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 14ff388fd3bd..f0b9ef13153a 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -226,7 +226,6 @@ static void __del_qgroup_rb(struct btrfs_fs_info *fs_info,
{
struct btrfs_qgroup_list *list;
- btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
list_del(&qgroup->dirty);
while (!list_empty(&qgroup->groups)) {
list = list_first_entry(&qgroup->groups,
@@ -243,7 +242,6 @@ static void __del_qgroup_rb(struct btrfs_fs_info *fs_info,
list_del(&list->next_member);
kfree(list);
}
- kfree(qgroup);
}
/* must be called with qgroup_lock held */
@@ -569,6 +567,8 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
qgroup = rb_entry(n, struct btrfs_qgroup, node);
rb_erase(n, &fs_info->qgroup_tree);
__del_qgroup_rb(fs_info, qgroup);
+ btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
+ kfree(qgroup);
}
/*
* We call btrfs_free_qgroup_config() when unmounting
@@ -1578,6 +1578,14 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
spin_lock(&fs_info->qgroup_lock);
del_qgroup_rb(fs_info, qgroupid);
spin_unlock(&fs_info->qgroup_lock);
+
+ /*
+ * Remove the qgroup from sysfs now without holding the qgroup_lock
+ * spinlock, since the sysfs_remove_group() function needs to take
+ * the mutex kernfs_mutex through kernfs_remove_by_name_ns().
+ */
+ btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
+ kfree(qgroup);
out:
mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 20fd4aa48a8c..06713a8fe26b 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -209,7 +209,7 @@ int btree_readahead_hook(struct extent_buffer *eb, int err)
/* find extent */
spin_lock(&fs_info->reada_lock);
re = radix_tree_lookup(&fs_info->reada_tree,
- eb->start >> PAGE_SHIFT);
+ eb->start >> fs_info->sectorsize_bits);
if (re)
re->refcnt++;
spin_unlock(&fs_info->reada_lock);
@@ -240,7 +240,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
zone = NULL;
spin_lock(&fs_info->reada_lock);
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
- logical >> PAGE_SHIFT, 1);
+ logical >> fs_info->sectorsize_bits, 1);
if (ret == 1 && logical >= zone->start && logical <= zone->end) {
kref_get(&zone->refcnt);
spin_unlock(&fs_info->reada_lock);
@@ -283,13 +283,13 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&dev->reada_zones,
- (unsigned long)(zone->end >> PAGE_SHIFT),
- zone);
+ (unsigned long)(zone->end >> fs_info->sectorsize_bits),
+ zone);
if (ret == -EEXIST) {
kfree(zone);
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
- logical >> PAGE_SHIFT, 1);
+ logical >> fs_info->sectorsize_bits, 1);
if (ret == 1 && logical >= zone->start && logical <= zone->end)
kref_get(&zone->refcnt);
else
@@ -315,7 +315,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
u64 length;
int real_stripes;
int nzones = 0;
- unsigned long index = logical >> PAGE_SHIFT;
+ unsigned long index = logical >> fs_info->sectorsize_bits;
int dev_replace_is_ongoing;
int have_zone = 0;
@@ -497,7 +497,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
struct reada_extent *re)
{
int i;
- unsigned long index = re->logical >> PAGE_SHIFT;
+ unsigned long index = re->logical >> fs_info->sectorsize_bits;
spin_lock(&fs_info->reada_lock);
if (--re->refcnt) {
@@ -538,11 +538,12 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
static void reada_zone_release(struct kref *kref)
{
struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt);
+ struct btrfs_fs_info *fs_info = zone->device->fs_info;
- lockdep_assert_held(&zone->device->fs_info->reada_lock);
+ lockdep_assert_held(&fs_info->reada_lock);
radix_tree_delete(&zone->device->reada_zones,
- zone->end >> PAGE_SHIFT);
+ zone->end >> fs_info->sectorsize_bits);
kfree(zone);
}
@@ -593,7 +594,7 @@ static int reada_add_block(struct reada_control *rc, u64 logical,
static void reada_peer_zones_set_lock(struct reada_zone *zone, int lock)
{
int i;
- unsigned long index = zone->end >> PAGE_SHIFT;
+ unsigned long index = zone->end >> zone->device->fs_info->sectorsize_bits;
for (i = 0; i < zone->ndevs; ++i) {
struct reada_zone *peer;
@@ -628,7 +629,7 @@ static int reada_pick_zone(struct btrfs_device *dev)
(void **)&zone, index, 1);
if (ret == 0)
break;
- index = (zone->end >> PAGE_SHIFT) + 1;
+ index = (zone->end >> dev->fs_info->sectorsize_bits) + 1;
if (zone->locked) {
if (zone->elems > top_locked_elems) {
top_locked_elems = zone->elems;
@@ -709,7 +710,7 @@ static int reada_start_machine_dev(struct btrfs_device *dev)
* plugging to speed things up
*/
ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
- dev->reada_next >> PAGE_SHIFT, 1);
+ dev->reada_next >> fs_info->sectorsize_bits, 1);
if (ret == 0 || re->logical > dev->reada_curr_zone->end) {
ret = reada_pick_zone(dev);
if (!ret) {
@@ -718,7 +719,7 @@ static int reada_start_machine_dev(struct btrfs_device *dev)
}
re = NULL;
ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
- dev->reada_next >> PAGE_SHIFT, 1);
+ dev->reada_next >> fs_info->sectorsize_bits, 1);
}
if (ret == 0) {
spin_unlock(&fs_info->reada_lock);
@@ -885,7 +886,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
pr_cont(" curr off %llu",
device->reada_next - zone->start);
pr_cont("\n");
- index = (zone->end >> PAGE_SHIFT) + 1;
+ index = (zone->end >> fs_info->sectorsize_bits) + 1;
}
cnt = 0;
index = 0;
@@ -910,7 +911,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
}
}
pr_cont("\n");
- index = (re->logical >> PAGE_SHIFT) + 1;
+ index = (re->logical >> fs_info->sectorsize_bits) + 1;
if (++cnt > 15)
break;
}
@@ -926,7 +927,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
if (ret == 0)
break;
if (!re->scheduled) {
- index = (re->logical >> PAGE_SHIFT) + 1;
+ index = (re->logical >> fs_info->sectorsize_bits) + 1;
continue;
}
pr_debug("re: logical %llu size %u list empty %d scheduled %d",
@@ -942,7 +943,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
}
}
pr_cont("\n");
- index = (re->logical >> PAGE_SHIFT) + 1;
+ index = (re->logical >> fs_info->sectorsize_bits) + 1;
}
spin_unlock(&fs_info->reada_lock);
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 2f1acc9aea9e..92a368627791 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3169,10 +3169,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
mutex_lock(&log_root_tree->log_mutex);
- index2 = log_root_tree->log_transid % 2;
- list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]);
- root_log_ctx.log_transid = log_root_tree->log_transid;
-
if (btrfs_is_zoned(fs_info)) {
if (!log_root_tree->node) {
ret = btrfs_alloc_log_tree_node(trans, log_root_tree);
@@ -3183,6 +3179,10 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
}
}
+ index2 = log_root_tree->log_transid % 2;
+ list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]);
+ root_log_ctx.log_transid = log_root_tree->log_transid;
+
/*
* Now we are safe to update the log_root_tree because we're under the
* log_mutex, and we're a current writer so we're holding the commit
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bc3b33efddc5..1c6810bbaf8b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -7448,6 +7448,9 @@ static int btrfs_device_init_dev_stats(struct btrfs_device *device,
int item_size;
int i, ret, slot;
+ if (!device->fs_info->dev_root)
+ return 0;
+
key.objectid = BTRFS_DEV_STATS_OBJECTID;
key.type = BTRFS_PERSISTENT_ITEM_KEY;
key.offset = device->devid;
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index dfb14dbddf51..38bb7764b454 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -118,6 +118,12 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
cache->mnt = path.mnt;
root = path.dentry;
+ ret = -EINVAL;
+ if (mnt_user_ns(path.mnt) != &init_user_ns) {
+ pr_warn("File cache on idmapped mounts not supported");
+ goto error_unsupported;
+ }
+
/* check parameters */
ret = -EOPNOTSUPP;
if (d_is_negative(root) ||
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index e027c718ca01..8ffc40e84a59 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -24,17 +24,16 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode,
container_of(wait, struct cachefiles_one_read, monitor);
struct cachefiles_object *object;
struct fscache_retrieval *op = monitor->op;
- struct wait_bit_key *key = _key;
+ struct wait_page_key *key = _key;
struct page *page = wait->private;
ASSERT(key);
_enter("{%lu},%u,%d,{%p,%u}",
monitor->netfs_page->index, mode, sync,
- key->flags, key->bit_nr);
+ key->page, key->bit_nr);
- if (key->flags != &page->flags ||
- key->bit_nr != PG_locked)
+ if (key->page != page || key->bit_nr != PG_locked)
return 0;
_debug("--- monitor %p %lx ---", page, page->flags);
diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c
index f2d730fffccb..d829b8bf833e 100644
--- a/fs/cifs/cifs_swn.c
+++ b/fs/cifs/cifs_swn.c
@@ -248,7 +248,7 @@ nlmsg_fail:
/*
* Try to find a matching registration for the tcon's server name and share name.
- * Calls to this funciton must be protected by cifs_swnreg_idr_mutex.
+ * Calls to this function must be protected by cifs_swnreg_idr_mutex.
* TODO Try to avoid memory allocations
*/
static struct cifs_swn_reg *cifs_find_swn_reg(struct cifs_tcon *tcon)
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 9d29eb9660c2..d178cf85e926 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -1118,7 +1118,6 @@ static int set_chmod_dacl(struct cifs_acl *pdacl, struct cifs_acl *pndacl,
/* Retain old ACEs which we can retain */
for (i = 0; i < src_num_aces; ++i) {
pntace = (struct cifs_ace *) (acl_base + size);
- pnntace = (struct cifs_ace *) (nacl_base + nsize);
if (!new_aces_set && (pntace->flags & INHERITED_ACE)) {
/* Place the new ACEs in between existing explicit and inherited */
@@ -1131,14 +1130,17 @@ static int set_chmod_dacl(struct cifs_acl *pdacl, struct cifs_acl *pndacl,
}
/* If it's any one of the ACE we're replacing, skip! */
- if ((compare_sids(&pntace->sid, &sid_unix_NFS_mode) == 0) ||
+ if (((compare_sids(&pntace->sid, &sid_unix_NFS_mode) == 0) ||
(compare_sids(&pntace->sid, pownersid) == 0) ||
(compare_sids(&pntace->sid, pgrpsid) == 0) ||
(compare_sids(&pntace->sid, &sid_everyone) == 0) ||
- (compare_sids(&pntace->sid, &sid_authusers) == 0)) {
+ (compare_sids(&pntace->sid, &sid_authusers) == 0))) {
goto next_ace;
}
+ /* update the pointer to the next ACE to populate*/
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+
nsize += cifs_copy_ace(pnntace, pntace, NULL);
num_aces++;
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 31fc8695abd6..67c056a9a519 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -919,8 +919,8 @@ struct cifs_ses {
bool binding:1; /* are we binding the session? */
__u16 session_flags;
__u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
- __u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE];
- __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE];
+ __u8 smb3encryptionkey[SMB3_ENC_DEC_KEY_SIZE];
+ __u8 smb3decryptionkey[SMB3_ENC_DEC_KEY_SIZE];
__u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE];
__u8 binding_preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE];
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 64fe5a47b5e8..9adc74bd9f8f 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -147,6 +147,11 @@
*/
#define SMB3_SIGN_KEY_SIZE (16)
+/*
+ * Size of the smb3 encryption/decryption keys
+ */
+#define SMB3_ENC_DEC_KEY_SIZE (32)
+
#define CIFS_CLIENT_CHALLENGE_SIZE (8)
#define CIFS_SERVER_CHALLENGE_SIZE (8)
#define CIFS_HMAC_MD5_HASH_SIZE (16)
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 26de4329d161..042e24aad410 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -165,6 +165,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
goto posix_open_ret;
}
} else {
+ cifs_revalidate_mapping(*pinode);
cifs_fattr_to_inode(*pinode, &fattr);
}
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index 892f51a21278..78889024a7ed 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -1196,9 +1196,11 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
pr_warn_once("Witness protocol support is experimental\n");
break;
case Opt_rootfs:
-#ifdef CONFIG_CIFS_ROOT
- ctx->rootfs = true;
+#ifndef CONFIG_CIFS_ROOT
+ cifs_dbg(VFS, "rootfs support requires CONFIG_CIFS_ROOT config option\n");
+ goto cifs_parse_mount_err;
#endif
+ ctx->rootfs = true;
break;
case Opt_posixpaths:
if (result.negated)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 7c61bc9573c0..f2df4422e54a 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2395,7 +2395,7 @@ int cifs_getattr(struct user_namespace *mnt_userns, const struct path *path,
* We need to be sure that all dirty pages are written and the server
* has actual ctime, mtime and file length.
*/
- if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_SIZE)) &&
+ if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_SIZE | STATX_BLOCKS)) &&
!CIFS_CACHE_READ(CIFS_I(inode)) &&
inode->i_mapping && inode->i_mapping->nrpages != 0) {
rc = filemap_fdatawait(inode->i_mapping);
@@ -2585,6 +2585,14 @@ set_size_out:
if (rc == 0) {
cifsInode->server_eof = attrs->ia_size;
cifs_setsize(inode, attrs->ia_size);
+ /*
+ * i_blocks is not related to (i_size / i_blksize), but instead
+ * 512 byte (2**9) size is required for calculating num blocks.
+ * Until we can query the server for actual allocation size,
+ * this is best estimate we have for blocks allocated for a file
+ * Number of blocks must be rounded up so size 1 is not 0 blocks
+ */
+ inode->i_blocks = (512 - 1 + attrs->ia_size) >> 9;
/*
* The man page of truncate says if the size changed,
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index 99a1951a01ec..d9a990c99121 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -58,6 +58,7 @@
#define SMB2_HMACSHA256_SIZE (32)
#define SMB2_CMACAES_SIZE (16)
#define SMB3_SIGNKEY_SIZE (16)
+#define SMB3_GCM128_CRYPTKEY_SIZE (16)
#define SMB3_GCM256_CRYPTKEY_SIZE (32)
/* Maximum buffer size value we can send with 1 credit */
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index b50164e2c88d..aac384f69f74 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -754,8 +754,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
}
}
spin_unlock(&cifs_tcp_ses_lock);
- cifs_dbg(FYI, "Can not process oplock break for non-existent connection\n");
- return false;
+ cifs_dbg(FYI, "No file id matched, oplock break ignored\n");
+ return true;
}
void
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 9bae7e8deb09..f703204fb185 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2038,6 +2038,7 @@ smb2_duplicate_extents(const unsigned int xid,
{
int rc;
unsigned int ret_data_len;
+ struct inode *inode;
struct duplicate_extents_to_file dup_ext_buf;
struct cifs_tcon *tcon = tlink_tcon(trgtfile->tlink);
@@ -2054,10 +2055,21 @@ smb2_duplicate_extents(const unsigned int xid,
cifs_dbg(FYI, "Duplicate extents: src off %lld dst off %lld len %lld\n",
src_off, dest_off, len);
- rc = smb2_set_file_size(xid, tcon, trgtfile, dest_off + len, false);
- if (rc)
- goto duplicate_extents_out;
+ inode = d_inode(trgtfile->dentry);
+ if (inode->i_size < dest_off + len) {
+ rc = smb2_set_file_size(xid, tcon, trgtfile, dest_off + len, false);
+ if (rc)
+ goto duplicate_extents_out;
+ /*
+ * Although also could set plausible allocation size (i_blocks)
+ * here in addition to setting the file size, in reflink
+ * it is likely that the target file is sparse. Its allocation
+ * size will be queried on next revalidate, but it is important
+ * to make sure that file's cached size is updated immediately
+ */
+ cifs_setsize(inode, dest_off + len);
+ }
rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
trgtfile->fid.volatile_fid,
FSCTL_DUPLICATE_EXTENTS_TO_FILE,
@@ -4158,7 +4170,7 @@ smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key)
if (ses->Suid == ses_id) {
ses_enc_key = enc ? ses->smb3encryptionkey :
ses->smb3decryptionkey;
- memcpy(key, ses_enc_key, SMB3_SIGN_KEY_SIZE);
+ memcpy(key, ses_enc_key, SMB3_ENC_DEC_KEY_SIZE);
spin_unlock(&cifs_tcp_ses_lock);
return 0;
}
@@ -4185,7 +4197,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
int rc = 0;
struct scatterlist *sg;
u8 sign[SMB2_SIGNATURE_SIZE] = {};
- u8 key[SMB3_SIGN_KEY_SIZE];
+ u8 key[SMB3_ENC_DEC_KEY_SIZE];
struct aead_request *req;
char *iv;
unsigned int iv_len;
@@ -4209,10 +4221,11 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
tfm = enc ? server->secmech.ccmaesencrypt :
server->secmech.ccmaesdecrypt;
- if (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)
+ if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) ||
+ (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM))
rc = crypto_aead_setkey(tfm, key, SMB3_GCM256_CRYPTKEY_SIZE);
else
- rc = crypto_aead_setkey(tfm, key, SMB3_SIGN_KEY_SIZE);
+ rc = crypto_aead_setkey(tfm, key, SMB3_GCM128_CRYPTKEY_SIZE);
if (rc) {
cifs_server_dbg(VFS, "%s: Failed to set aead key %d\n", __func__, rc);
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index ebccd71cc60a..e6fa76ab70be 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -298,7 +298,8 @@ static int generate_key(struct cifs_ses *ses, struct kvec label,
{
unsigned char zero = 0x0;
__u8 i[4] = {0, 0, 0, 1};
- __u8 L[4] = {0, 0, 0, 128};
+ __u8 L128[4] = {0, 0, 0, 128};
+ __u8 L256[4] = {0, 0, 1, 0};
int rc = 0;
unsigned char prfhash[SMB2_HMACSHA256_SIZE];
unsigned char *hashptr = prfhash;
@@ -354,8 +355,14 @@ static int generate_key(struct cifs_ses *ses, struct kvec label,
goto smb3signkey_ret;
}
- rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
- L, 4);
+ if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) ||
+ (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) {
+ rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
+ L256, 4);
+ } else {
+ rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
+ L128, 4);
+ }
if (rc) {
cifs_server_dbg(VFS, "%s: Could not update with L\n", __func__);
goto smb3signkey_ret;
@@ -390,6 +397,9 @@ generate_smb3signingkey(struct cifs_ses *ses,
const struct derivation_triplet *ptriplet)
{
int rc;
+#ifdef CONFIG_CIFS_DEBUG_DUMP_KEYS
+ struct TCP_Server_Info *server = ses->server;
+#endif
/*
* All channels use the same encryption/decryption keys but
@@ -422,11 +432,11 @@ generate_smb3signingkey(struct cifs_ses *ses,
rc = generate_key(ses, ptriplet->encryption.label,
ptriplet->encryption.context,
ses->smb3encryptionkey,
- SMB3_SIGN_KEY_SIZE);
+ SMB3_ENC_DEC_KEY_SIZE);
rc = generate_key(ses, ptriplet->decryption.label,
ptriplet->decryption.context,
ses->smb3decryptionkey,
- SMB3_SIGN_KEY_SIZE);
+ SMB3_ENC_DEC_KEY_SIZE);
if (rc)
return rc;
}
@@ -442,14 +452,23 @@ generate_smb3signingkey(struct cifs_ses *ses,
*/
cifs_dbg(VFS, "Session Id %*ph\n", (int)sizeof(ses->Suid),
&ses->Suid);
+ cifs_dbg(VFS, "Cipher type %d\n", server->cipher_type);
cifs_dbg(VFS, "Session Key %*ph\n",
SMB2_NTLMV2_SESSKEY_SIZE, ses->auth_key.response);
cifs_dbg(VFS, "Signing Key %*ph\n",
SMB3_SIGN_KEY_SIZE, ses->smb3signingkey);
- cifs_dbg(VFS, "ServerIn Key %*ph\n",
- SMB3_SIGN_KEY_SIZE, ses->smb3encryptionkey);
- cifs_dbg(VFS, "ServerOut Key %*ph\n",
- SMB3_SIGN_KEY_SIZE, ses->smb3decryptionkey);
+ if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) ||
+ (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) {
+ cifs_dbg(VFS, "ServerIn Key %*ph\n",
+ SMB3_GCM256_CRYPTKEY_SIZE, ses->smb3encryptionkey);
+ cifs_dbg(VFS, "ServerOut Key %*ph\n",
+ SMB3_GCM256_CRYPTKEY_SIZE, ses->smb3decryptionkey);
+ } else {
+ cifs_dbg(VFS, "ServerIn Key %*ph\n",
+ SMB3_GCM128_CRYPTKEY_SIZE, ses->smb3encryptionkey);
+ cifs_dbg(VFS, "ServerOut Key %*ph\n",
+ SMB3_GCM128_CRYPTKEY_SIZE, ses->smb3decryptionkey);
+ }
#endif
return rc;
}
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 007d99437c77..c1725b55f364 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -1196,9 +1196,12 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
/*
* Compounding is never used during session establish.
*/
- if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP))
+ if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
+ mutex_lock(&server->srv_mutex);
smb311_update_preauth_hash(ses, rqst[0].rq_iov,
rqst[0].rq_nvec);
+ mutex_unlock(&server->srv_mutex);
+ }
for (i = 0; i < num_rqst; i++) {
rc = wait_for_response(server, midQ[i]);
@@ -1266,7 +1269,9 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
.iov_base = resp_iov[0].iov_base,
.iov_len = resp_iov[0].iov_len
};
+ mutex_lock(&server->srv_mutex);
smb311_update_preauth_hash(ses, &iov, 1);
+ mutex_unlock(&server->srv_mutex);
}
out:
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index f45f9feebe59..74a5172c2d83 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -626,27 +626,41 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
/**
* ext4_should_retry_alloc() - check if a block allocation should be retried
- * @sb: super block
- * @retries: number of attemps has been made
+ * @sb: superblock
+ * @retries: number of retry attempts made so far
*
- * ext4_should_retry_alloc() is called when ENOSPC is returned, and if
- * it is profitable to retry the operation, this function will wait
- * for the current or committing transaction to complete, and then
- * return TRUE. We will only retry once.
+ * ext4_should_retry_alloc() is called when ENOSPC is returned while
+ * attempting to allocate blocks. If there's an indication that a pending
+ * journal transaction might free some space and allow another attempt to
+ * succeed, this function will wait for the current or committing transaction
+ * to complete and then return TRUE.
*/
int ext4_should_retry_alloc(struct super_block *sb, int *retries)
{
- if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) ||
- (*retries)++ > 1 ||
- !EXT4_SB(sb)->s_journal)
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ if (!sbi->s_journal)
return 0;
- smp_mb();
- if (EXT4_SB(sb)->s_mb_free_pending == 0)
+ if (++(*retries) > 3) {
+ percpu_counter_inc(&sbi->s_sra_exceeded_retry_limit);
return 0;
+ }
+ /*
+ * if there's no indication that blocks are about to be freed it's
+ * possible we just missed a transaction commit that did so
+ */
+ smp_mb();
+ if (sbi->s_mb_free_pending == 0)
+ return ext4_has_free_clusters(sbi, 1, 0);
+
+ /*
+ * it's possible we've just missed a transaction commit here,
+ * so ignore the returned status
+ */
jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
- jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
+ (void) jbd2_journal_force_commit_nested(sbi->s_journal);
return 1;
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 644fd69185d3..826a56e3bbd2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1484,6 +1484,7 @@ struct ext4_sb_info {
struct percpu_counter s_freeinodes_counter;
struct percpu_counter s_dirs_counter;
struct percpu_counter s_dirtyclusters_counter;
+ struct percpu_counter s_sra_exceeded_retry_limit;
struct blockgroup_lock *s_blockgroup_lock;
struct proc_dir_entry *s_proc;
struct kobject s_kobj;
@@ -2793,6 +2794,8 @@ void __ext4_fc_track_link(handle_t *handle, struct inode *inode,
struct dentry *dentry);
void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry);
void ext4_fc_track_link(handle_t *handle, struct dentry *dentry);
+void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
+ struct dentry *dentry);
void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
void ext4_fc_track_inode(handle_t *handle, struct inode *inode);
void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 77c7c8a54da7..77c84d6f1af6 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4382,7 +4382,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
{
struct inode *inode = file_inode(file);
handle_t *handle;
- int ret, ret2 = 0, ret3 = 0;
+ int ret = 0, ret2 = 0, ret3 = 0;
int retries = 0;
int depth = 0;
struct ext4_map_blocks map;
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 6c4f19b0a556..7541d0b5d706 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -513,10 +513,10 @@ void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
__ext4_fc_track_link(handle, d_inode(dentry), dentry);
}
-void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
+void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
+ struct dentry *dentry)
{
struct __track_dentry_update_args args;
- struct inode *inode = d_inode(dentry);
int ret;
args.dentry = dentry;
@@ -527,6 +527,11 @@ void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
trace_ext4_fc_track_create(inode, dentry, ret);
}
+void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
+{
+ __ext4_fc_track_create(handle, d_inode(dentry), dentry);
+}
+
/* __track_fn for inode tracking */
static int __track_inode(struct inode *inode, void *arg, bool update)
{
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 650c5acd2f2d..0948a43f1b3d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1938,13 +1938,13 @@ static int __ext4_journalled_writepage(struct page *page,
if (!ret)
ret = err;
- if (!ext4_has_inline_data(inode))
- ext4_walk_page_buffers(NULL, page_bufs, 0, len,
- NULL, bput_one);
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
out:
unlock_page(page);
out_no_pagelock:
+ if (!inline_data && page_bufs)
+ ext4_walk_page_buffers(NULL, page_bufs, 0, len,
+ NULL, bput_one);
brelse(inode_bh);
return ret;
}
@@ -5026,7 +5026,7 @@ static int ext4_do_update_inode(handle_t *handle,
struct ext4_inode_info *ei = EXT4_I(inode);
struct buffer_head *bh = iloc->bh;
struct super_block *sb = inode->i_sb;
- int err = 0, rc, block;
+ int err = 0, block;
int need_datasync = 0, set_large_file = 0;
uid_t i_uid;
gid_t i_gid;
@@ -5138,9 +5138,9 @@ static int ext4_do_update_inode(handle_t *handle,
bh->b_data);
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- rc = ext4_handle_dirty_metadata(handle, NULL, bh);
- if (!err)
- err = rc;
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (err)
+ goto out_brelse;
ext4_clear_inode_state(inode, EXT4_STATE_NEW);
if (set_large_file) {
BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access");
@@ -5387,8 +5387,10 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
inode->i_gid = attr->ia_gid;
error = ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
- if (unlikely(error))
+ if (unlikely(error)) {
+ ext4_fc_stop_update(inode);
return error;
+ }
}
if (attr->ia_valid & ATTR_SIZE) {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 99bf091fee10..a02fadf4fc84 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2709,8 +2709,15 @@ static int ext4_mb_init_backend(struct super_block *sb)
}
if (ext4_has_feature_flex_bg(sb)) {
- /* a single flex group is supposed to be read by a single IO */
- sbi->s_mb_prefetch = min(1 << sbi->s_es->s_log_groups_per_flex,
+ /* a single flex group is supposed to be read by a single IO.
+ * 2 ^ s_log_groups_per_flex != UINT_MAX as s_mb_prefetch is
+ * unsigned integer, so the maximum shift is 32.
+ */
+ if (sbi->s_es->s_log_groups_per_flex >= 32) {
+ ext4_msg(sb, KERN_ERR, "too many log groups per flexible block group");
+ goto err_freesgi;
+ }
+ sbi->s_mb_prefetch = min_t(uint, 1 << sbi->s_es->s_log_groups_per_flex,
BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9));
sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */
} else {
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 686bf982c84e..883e2a7cd4ab 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3613,6 +3613,31 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
return retval;
}
+static void ext4_resetent(handle_t *handle, struct ext4_renament *ent,
+ unsigned ino, unsigned file_type)
+{
+ struct ext4_renament old = *ent;
+ int retval = 0;
+
+ /*
+ * old->de could have moved from under us during make indexed dir,
+ * so the old->de may no longer valid and need to find it again
+ * before reset old inode info.
+ */
+ old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
+ if (IS_ERR(old.bh))
+ retval = PTR_ERR(old.bh);
+ if (!old.bh)
+ retval = -ENOENT;
+ if (retval) {
+ ext4_std_error(old.dir->i_sb, retval);
+ return;
+ }
+
+ ext4_setent(handle, &old, ino, file_type);
+ brelse(old.bh);
+}
+
static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
const struct qstr *d_name)
{
@@ -3774,14 +3799,14 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
*/
retval = -ENOENT;
if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
- goto end_rename;
+ goto release_bh;
new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
&new.de, &new.inlined);
if (IS_ERR(new.bh)) {
retval = PTR_ERR(new.bh);
new.bh = NULL;
- goto end_rename;
+ goto release_bh;
}
if (new.bh) {
if (!new.inode) {
@@ -3798,15 +3823,13 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits);
if (IS_ERR(handle)) {
retval = PTR_ERR(handle);
- handle = NULL;
- goto end_rename;
+ goto release_bh;
}
} else {
whiteout = ext4_whiteout_for_rename(mnt_userns, &old, credits, &handle);
if (IS_ERR(whiteout)) {
retval = PTR_ERR(whiteout);
- whiteout = NULL;
- goto end_rename;
+ goto release_bh;
}
}
@@ -3850,6 +3873,7 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
retval = ext4_mark_inode_dirty(handle, whiteout);
if (unlikely(retval))
goto end_rename;
+
}
if (!new.bh) {
retval = ext4_add_entry(handle, new.dentry, old.inode);
@@ -3923,6 +3947,8 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
ext4_fc_track_unlink(handle, new.dentry);
__ext4_fc_track_link(handle, old.inode, new.dentry);
__ext4_fc_track_unlink(handle, old.inode, old.dentry);
+ if (whiteout)
+ __ext4_fc_track_create(handle, whiteout, old.dentry);
}
if (new.inode) {
@@ -3937,19 +3963,21 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
end_rename:
if (whiteout) {
if (retval) {
- ext4_setent(handle, &old,
- old.inode->i_ino, old_file_type);
+ ext4_resetent(handle, &old,
+ old.inode->i_ino, old_file_type);
drop_nlink(whiteout);
+ ext4_orphan_add(handle, whiteout);
}
unlock_new_inode(whiteout);
+ ext4_journal_stop(handle);
iput(whiteout);
-
+ } else {
+ ext4_journal_stop(handle);
}
+release_bh:
brelse(old.dir_bh);
brelse(old.bh);
brelse(new.bh);
- if (handle)
- ext4_journal_stop(handle);
return retval;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ad34a37278cd..b9693680463a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1210,6 +1210,7 @@ static void ext4_put_super(struct super_block *sb)
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
+ percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
percpu_free_rwsem(&sbi->s_writepages_rwsem);
#ifdef CONFIG_QUOTA
for (i = 0; i < EXT4_MAXQUOTAS; i++)
@@ -5012,6 +5013,9 @@ no_journal:
err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
GFP_KERNEL);
if (!err)
+ err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0,
+ GFP_KERNEL);
+ if (!err)
err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
if (err) {
@@ -5124,6 +5128,7 @@ failed_mount6:
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
+ percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
percpu_free_rwsem(&sbi->s_writepages_rwsem);
failed_mount5:
ext4_ext_release(sb);
@@ -5149,8 +5154,8 @@ failed_mount_wq:
failed_mount3a:
ext4_es_unregister_shrinker(sbi);
failed_mount3:
- del_timer_sync(&sbi->s_err_report);
flush_work(&sbi->s_error_work);
+ del_timer_sync(&sbi->s_err_report);
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
failed_mount2:
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 075aa3a19ff5..a3d08276d441 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -24,6 +24,7 @@ typedef enum {
attr_session_write_kbytes,
attr_lifetime_write_kbytes,
attr_reserved_clusters,
+ attr_sra_exceeded_retry_limit,
attr_inode_readahead,
attr_trigger_test_error,
attr_first_error_time,
@@ -202,6 +203,7 @@ EXT4_ATTR_FUNC(delayed_allocation_blocks, 0444);
EXT4_ATTR_FUNC(session_write_kbytes, 0444);
EXT4_ATTR_FUNC(lifetime_write_kbytes, 0444);
EXT4_ATTR_FUNC(reserved_clusters, 0644);
+EXT4_ATTR_FUNC(sra_exceeded_retry_limit, 0444);
EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead,
ext4_sb_info, s_inode_readahead_blks);
@@ -251,6 +253,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(session_write_kbytes),
ATTR_LIST(lifetime_write_kbytes),
ATTR_LIST(reserved_clusters),
+ ATTR_LIST(sra_exceeded_retry_limit),
ATTR_LIST(inode_readahead_blks),
ATTR_LIST(inode_goal),
ATTR_LIST(mb_stats),
@@ -374,6 +377,10 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
return snprintf(buf, PAGE_SIZE, "%llu\n",
(unsigned long long)
atomic64_read(&sbi->s_resv_clusters));
+ case attr_sra_exceeded_retry_limit:
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)
+ percpu_counter_sum(&sbi->s_sra_exceeded_retry_limit));
case attr_inode_readahead:
case attr_pointer_ui:
if (!ptr)
diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
index 5b7ba8f71153..00e3cbde472e 100644
--- a/fs/ext4/verity.c
+++ b/fs/ext4/verity.c
@@ -201,55 +201,76 @@ static int ext4_end_enable_verity(struct file *filp, const void *desc,
struct inode *inode = file_inode(filp);
const int credits = 2; /* superblock and inode for ext4_orphan_del() */
handle_t *handle;
+ struct ext4_iloc iloc;
int err = 0;
- int err2;
- if (desc != NULL) {
- /* Succeeded; write the verity descriptor. */
- err = ext4_write_verity_descriptor(inode, desc, desc_size,
- merkle_tree_size);
-
- /* Write all pages before clearing VERITY_IN_PROGRESS. */
- if (!err)
- err = filemap_write_and_wait(inode->i_mapping);
- }
+ /*
+ * If an error already occurred (which fs/verity/ signals by passing
+ * desc == NULL), then only clean-up is needed.
+ */
+ if (desc == NULL)
+ goto cleanup;
- /* If we failed, truncate anything we wrote past i_size. */
- if (desc == NULL || err)
- ext4_truncate(inode);
+ /* Append the verity descriptor. */
+ err = ext4_write_verity_descriptor(inode, desc, desc_size,
+ merkle_tree_size);
+ if (err)
+ goto cleanup;
/*
- * We must always clean up by clearing EXT4_STATE_VERITY_IN_PROGRESS and
- * deleting the inode from the orphan list, even if something failed.
- * If everything succeeded, we'll also set the verity bit in the same
- * transaction.
+ * Write all pages (both data and verity metadata). Note that this must
+ * happen before clearing EXT4_STATE_VERITY_IN_PROGRESS; otherwise pages
+ * beyond i_size won't be written properly. For crash consistency, this
+ * also must happen before the verity inode flag gets persisted.
*/
+ err = filemap_write_and_wait(inode->i_mapping);
+ if (err)
+ goto cleanup;
- ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
+ /*
+ * Finally, set the verity inode flag and remove the inode from the
+ * orphan list (in a single transaction).
+ */
handle = ext4_journal_start(inode, EXT4_HT_INODE, credits);
if (IS_ERR(handle)) {
- ext4_orphan_del(NULL, inode);
- return PTR_ERR(handle);
+ err = PTR_ERR(handle);
+ goto cleanup;
}
- err2 = ext4_orphan_del(handle, inode);
- if (err2)
- goto out_stop;
+ err = ext4_orphan_del(handle, inode);
+ if (err)
+ goto stop_and_cleanup;
- if (desc != NULL && !err) {
- struct ext4_iloc iloc;
+ err = ext4_reserve_inode_write(handle, inode, &iloc);
+ if (err)
+ goto stop_and_cleanup;
- err = ext4_reserve_inode_write(handle, inode, &iloc);
- if (err)
- goto out_stop;
- ext4_set_inode_flag(inode, EXT4_INODE_VERITY);
- ext4_set_inode_flags(inode, false);
- err = ext4_mark_iloc_dirty(handle, inode, &iloc);
- }
-out_stop:
+ ext4_set_inode_flag(inode, EXT4_INODE_VERITY);
+ ext4_set_inode_flags(inode, false);
+ err = ext4_mark_iloc_dirty(handle, inode, &iloc);
+ if (err)
+ goto stop_and_cleanup;
+
+ ext4_journal_stop(handle);
+
+ ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
+ return 0;
+
+stop_and_cleanup:
ext4_journal_stop(handle);
- return err ?: err2;
+cleanup:
+ /*
+ * Verity failed to be enabled, so clean up by truncating any verity
+ * metadata that was written beyond i_size (both from cache and from
+ * disk), removing the inode from the orphan list (if it wasn't done
+ * already), and clearing EXT4_STATE_VERITY_IN_PROGRESS.
+ */
+ truncate_inode_pages(inode->i_mapping, inode->i_size);
+ ext4_truncate(inode);
+ ext4_orphan_del(NULL, inode);
+ ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
+ return err;
}
static int ext4_get_verity_descriptor_location(struct inode *inode,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 372208500f4e..6c1018223c54 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1462,6 +1462,9 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
if (!ce)
return NULL;
+ WARN_ON_ONCE(ext4_handle_valid(journal_current_handle()) &&
+ !(current->flags & PF_MEMALLOC_NOFS));
+
ea_data = kvmalloc(value_len, GFP_KERNEL);
if (!ea_data) {
mb_cache_entry_put(ea_inode_cache, ce);
@@ -2327,6 +2330,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
error = -ENOSPC;
goto cleanup;
}
+ WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS));
}
error = ext4_reserve_inode_write(handle, inode, &is.iloc);
@@ -2400,7 +2404,7 @@ retry_inode:
* external inode if possible.
*/
if (ext4_has_feature_ea_inode(inode->i_sb) &&
- !i.in_inode) {
+ i.value_len && !i.in_inode) {
i.in_inode = 1;
goto retry_inode;
}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index c6636b4c4ccf..c0fee830a34e 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2229,19 +2229,21 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
- int err = -ENOTTY;
+ int res;
+ int oldfd;
+ struct fuse_dev *fud = NULL;
- if (cmd == FUSE_DEV_IOC_CLONE) {
- int oldfd;
+ if (_IOC_TYPE(cmd) != FUSE_DEV_IOC_MAGIC)
+ return -ENOTTY;
- err = -EFAULT;
- if (!get_user(oldfd, (__u32 __user *) arg)) {
+ switch (_IOC_NR(cmd)) {
+ case _IOC_NR(FUSE_DEV_IOC_CLONE):
+ res = -EFAULT;
+ if (!get_user(oldfd, (__u32 __user *)arg)) {
struct file *old = fget(oldfd);
- err = -EINVAL;
+ res = -EINVAL;
if (old) {
- struct fuse_dev *fud = NULL;
-
/*
* Check against file->f_op because CUSE
* uses the same ioctl handler.
@@ -2252,14 +2254,18 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
if (fud) {
mutex_lock(&fuse_mutex);
- err = fuse_device_clone(fud->fc, file);
+ res = fuse_device_clone(fud->fc, file);
mutex_unlock(&fuse_mutex);
}
fput(old);
}
}
+ break;
+ default:
+ res = -ENOTTY;
+ break;
}
- return err;
+ return res;
}
const struct file_operations fuse_dev_operations = {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 68cca8d4db6e..63d97a15ffde 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -863,6 +863,7 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc)
static inline void fuse_make_bad(struct inode *inode)
{
+ remove_inode_hash(inode);
set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state);
}
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 8868ac31a3c0..4ee6f734ba83 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -1324,8 +1324,15 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc)
/* virtiofs allocates and installs its own fuse devices */
ctx->fudptr = NULL;
- if (ctx->dax)
+ if (ctx->dax) {
+ if (!fs->dax_dev) {
+ err = -EINVAL;
+ pr_err("virtio-fs: dax can't be enabled as filesystem"
+ " device does not support it.\n");
+ goto err_free_fuse_devs;
+ }
ctx->dax_dev = fs->dax_dev;
+ }
err = fuse_fill_super_common(sb, ctx);
if (err < 0)
goto err_free_fuse_devs;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 97076d3f562f..8fb9602d79b4 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -162,8 +162,10 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
int error;
error = init_threads(sdp);
- if (error)
+ if (error) {
+ gfs2_withdraw_delayed(sdp);
return error;
+ }
j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
if (gfs2_withdrawn(sdp)) {
@@ -750,11 +752,13 @@ void gfs2_freeze_func(struct work_struct *work)
static int gfs2_freeze(struct super_block *sb)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
- int error = 0;
+ int error;
mutex_lock(&sdp->sd_freeze_mutex);
- if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN)
+ if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) {
+ error = -EBUSY;
goto out;
+ }
for (;;) {
if (gfs2_withdrawn(sdp)) {
@@ -795,10 +799,10 @@ static int gfs2_unfreeze(struct super_block *sb)
struct gfs2_sbd *sdp = sb->s_fs_info;
mutex_lock(&sdp->sd_freeze_mutex);
- if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN ||
+ if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN ||
!gfs2_holder_initialized(&sdp->sd_freeze_gh)) {
mutex_unlock(&sdp->sd_freeze_mutex);
- return 0;
+ return -EINVAL;
}
gfs2_freeze_unlock(&sdp->sd_freeze_gh);
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 0ae9ecadf295..433c4d3c3c1c 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -16,7 +16,6 @@
#include <linux/rculist_nulls.h>
#include <linux/cpu.h>
#include <linux/tracehook.h>
-#include <linux/freezer.h>
#include "../kernel/sched/sched.h"
#include "io-wq.h"
@@ -386,13 +385,14 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
return NULL;
}
-static void io_flush_signals(void)
+static bool io_flush_signals(void)
{
- if (unlikely(test_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL))) {
- if (current->task_works)
- task_work_run();
- clear_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL);
+ if (unlikely(test_thread_flag(TIF_NOTIFY_SIGNAL))) {
+ __set_current_state(TASK_RUNNING);
+ tracehook_notify_signal();
+ return true;
}
+ return false;
}
static void io_assign_current_work(struct io_worker *worker,
@@ -484,10 +484,12 @@ static int io_wqe_worker(void *data)
worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
io_wqe_inc_running(worker);
- sprintf(buf, "iou-wrk-%d", wq->task_pid);
+ snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task_pid);
set_task_comm(current, buf);
while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
+ long ret;
+
set_current_state(TASK_INTERRUPTIBLE);
loop:
raw_spin_lock_irq(&wqe->lock);
@@ -497,11 +499,18 @@ loop:
}
__io_worker_idle(wqe, worker);
raw_spin_unlock_irq(&wqe->lock);
- io_flush_signals();
- if (schedule_timeout(WORKER_IDLE_TIMEOUT))
+ if (io_flush_signals())
continue;
- if (fatal_signal_pending(current))
+ ret = schedule_timeout(WORKER_IDLE_TIMEOUT);
+ if (signal_pending(current)) {
+ struct ksignal ksig;
+
+ if (!get_signal(&ksig))
+ continue;
break;
+ }
+ if (ret)
+ continue;
/* timed out, exit unless we're the fixed worker */
if (test_bit(IO_WQ_BIT_EXIT, &wq->state) ||
!(worker->flags & IO_WORKER_F_FIXED))
@@ -702,15 +711,20 @@ static int io_wq_manager(void *data)
char buf[TASK_COMM_LEN];
int node;
- sprintf(buf, "iou-mgr-%d", wq->task_pid);
+ snprintf(buf, sizeof(buf), "iou-mgr-%d", wq->task_pid);
set_task_comm(current, buf);
do {
set_current_state(TASK_INTERRUPTIBLE);
io_wq_check_workers(wq);
schedule_timeout(HZ);
- if (fatal_signal_pending(current))
+ if (signal_pending(current)) {
+ struct ksignal ksig;
+
+ if (!get_signal(&ksig))
+ continue;
set_bit(IO_WQ_BIT_EXIT, &wq->state);
+ }
} while (!test_bit(IO_WQ_BIT_EXIT, &wq->state));
io_wq_check_workers(wq);
@@ -1057,7 +1071,11 @@ static void io_wq_destroy(struct io_wq *wq)
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
- WARN_ON_ONCE(!wq_list_empty(&wqe->work_list));
+ struct io_cb_cancel_data match = {
+ .fn = io_wq_work_match_all,
+ .cancel_all = true,
+ };
+ io_wqe_cancel_pending_work(wqe, &match);
kfree(wqe);
}
io_wq_put_hash(wq->hash);
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 1ac2f3248088..80d590564ff9 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -2,7 +2,6 @@
#define INTERNAL_IO_WQ_H
#include <linux/refcount.h>
-#include <linux/io_uring.h>
struct io_wq;
@@ -21,6 +20,15 @@ enum io_wq_cancel {
IO_WQ_CANCEL_NOTFOUND, /* work not found */
};
+struct io_wq_work_node {
+ struct io_wq_work_node *next;
+};
+
+struct io_wq_work_list {
+ struct io_wq_work_node *first;
+ struct io_wq_work_node *last;
+};
+
static inline void wq_list_add_after(struct io_wq_work_node *node,
struct io_wq_work_node *pos,
struct io_wq_work_list *list)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index a4bce17af506..65a17d560a73 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -78,7 +78,6 @@
#include <linux/task_work.h>
#include <linux/pagemap.h>
#include <linux/io_uring.h>
-#include <linux/freezer.h>
#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@@ -258,7 +257,8 @@ enum {
struct io_sq_data {
refcount_t refs;
- struct rw_semaphore rw_lock;
+ atomic_t park_pending;
+ struct mutex lock;
/* ctx's that are using this sqd */
struct list_head ctx_list;
@@ -273,6 +273,7 @@ struct io_sq_data {
unsigned long state;
struct completion exited;
+ struct callback_head *park_task_work;
};
#define IO_IOPOLL_BATCH 8
@@ -402,7 +403,7 @@ struct io_ring_ctx {
struct socket *ring_sock;
#endif
- struct idr io_buffer_idr;
+ struct xarray io_buffers;
struct xarray personalities;
u32 pers_next;
@@ -454,6 +455,22 @@ struct io_ring_ctx {
struct list_head tctx_list;
};
+struct io_uring_task {
+ /* submission side */
+ struct xarray xa;
+ struct wait_queue_head wait;
+ const struct io_ring_ctx *last;
+ struct io_wq *io_wq;
+ struct percpu_counter inflight;
+ atomic_t in_idle;
+ bool sqpoll;
+
+ spinlock_t task_lock;
+ struct io_wq_work_list task_list;
+ unsigned long task_state;
+ struct callback_head task_work;
+};
+
/*
* First field must be the file pointer in all the
* iocb unions! See also 'struct kiocb' in <linux/fs.h>
@@ -680,6 +697,7 @@ enum {
REQ_F_NO_FILE_TABLE_BIT,
REQ_F_LTIMEOUT_ACTIVE_BIT,
REQ_F_COMPLETE_INLINE_BIT,
+ REQ_F_REISSUE_BIT,
/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
@@ -723,6 +741,8 @@ enum {
REQ_F_LTIMEOUT_ACTIVE = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT),
/* completion is deferred through io_comp_state */
REQ_F_COMPLETE_INLINE = BIT(REQ_F_COMPLETE_INLINE_BIT),
+ /* caller should reissue async */
+ REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT),
};
struct async_poll {
@@ -1077,8 +1097,6 @@ static bool io_match_task(struct io_kiocb *head,
io_for_each_link(req, head) {
if (req->flags & REQ_F_INFLIGHT)
return true;
- if (req->task->files == files)
- return true;
}
return false;
}
@@ -1135,7 +1153,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
init_waitqueue_head(&ctx->cq_wait);
INIT_LIST_HEAD(&ctx->cq_overflow_list);
init_completion(&ctx->ref_comp);
- idr_init(&ctx->io_buffer_idr);
+ xa_init_flags(&ctx->io_buffers, XA_FLAGS_ALLOC1);
xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
mutex_init(&ctx->uring_lock);
init_waitqueue_head(&ctx->wait);
@@ -1198,7 +1216,7 @@ static void io_prep_async_work(struct io_kiocb *req)
if (req->flags & REQ_F_ISREG) {
if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL))
io_wq_hash_work(&req->work, file_inode(req->file));
- } else {
+ } else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) {
if (def->unbound_nonreg_file)
req->work.flags |= IO_WQ_WORK_UNBOUND;
}
@@ -1221,16 +1239,16 @@ static void io_queue_async_work(struct io_kiocb *req)
BUG_ON(!tctx);
BUG_ON(!tctx->io_wq);
- trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
- &req->work, req->flags);
/* init ->work of the whole link before punting */
io_prep_async_link(req);
+ trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
+ &req->work, req->flags);
io_wq_enqueue(tctx->io_wq, &req->work);
if (link)
io_queue_linked_timeout(link);
}
-static void io_kill_timeout(struct io_kiocb *req)
+static void io_kill_timeout(struct io_kiocb *req, int status)
{
struct io_timeout_data *io = req->async_data;
int ret;
@@ -1240,31 +1258,11 @@ static void io_kill_timeout(struct io_kiocb *req)
atomic_set(&req->ctx->cq_timeouts,
atomic_read(&req->ctx->cq_timeouts) + 1);
list_del_init(&req->timeout.list);
- io_cqring_fill_event(req, 0);
+ io_cqring_fill_event(req, status);
io_put_req_deferred(req, 1);
}
}
-/*
- * Returns true if we found and killed one or more timeouts
- */
-static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
- struct files_struct *files)
-{
- struct io_kiocb *req, *tmp;
- int canceled = 0;
-
- spin_lock_irq(&ctx->completion_lock);
- list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
- if (io_match_task(req, tsk, files)) {
- io_kill_timeout(req);
- canceled++;
- }
- }
- spin_unlock_irq(&ctx->completion_lock);
- return canceled != 0;
-}
-
static void __io_queue_deferred(struct io_ring_ctx *ctx)
{
do {
@@ -1309,7 +1307,7 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
break;
list_del_init(&req->timeout.list);
- io_kill_timeout(req);
+ io_kill_timeout(req, 0);
} while (!list_empty(&ctx->timeout_list));
ctx->cq_last_tm_flush = seq;
@@ -1550,14 +1548,17 @@ static void io_req_complete_post(struct io_kiocb *req, long res,
io_put_task(req->task, 1);
list_add(&req->compl.list, &cs->locked_free_list);
cs->locked_free_nr++;
- } else
- req = NULL;
+ } else {
+ if (!percpu_ref_tryget(&ctx->refs))
+ req = NULL;
+ }
io_commit_cqring(ctx);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
- io_cqring_ev_posted(ctx);
- if (req)
+ if (req) {
+ io_cqring_ev_posted(ctx);
percpu_ref_put(&ctx->refs);
+ }
}
static void io_req_complete_state(struct io_kiocb *req, long res,
@@ -1925,17 +1926,44 @@ static int io_req_task_work_add(struct io_kiocb *req)
return ret;
}
-static void io_req_task_work_add_fallback(struct io_kiocb *req,
- task_work_func_t cb)
+static bool io_run_task_work_head(struct callback_head **work_head)
+{
+ struct callback_head *work, *next;
+ bool executed = false;
+
+ do {
+ work = xchg(work_head, NULL);
+ if (!work)
+ break;
+
+ do {
+ next = work->next;
+ work->func(work);
+ work = next;
+ cond_resched();
+ } while (work);
+ executed = true;
+ } while (1);
+
+ return executed;
+}
+
+static void io_task_work_add_head(struct callback_head **work_head,
+ struct callback_head *task_work)
{
- struct io_ring_ctx *ctx = req->ctx;
struct callback_head *head;
- init_task_work(&req->task_work, cb);
do {
- head = READ_ONCE(ctx->exit_task_work);
- req->task_work.next = head;
- } while (cmpxchg(&ctx->exit_task_work, head, &req->task_work) != head);
+ head = READ_ONCE(*work_head);
+ task_work->next = head;
+ } while (cmpxchg(work_head, head, task_work) != head);
+}
+
+static void io_req_task_work_add_fallback(struct io_kiocb *req,
+ task_work_func_t cb)
+{
+ init_task_work(&req->task_work, cb);
+ io_task_work_add_head(&req->ctx->exit_task_work, &req->task_work);
}
static void __io_req_task_cancel(struct io_kiocb *req, int error)
@@ -2451,6 +2479,11 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
return false;
return true;
}
+#else
+static bool io_rw_should_reissue(struct io_kiocb *req)
+{
+ return false;
+}
#endif
static bool io_rw_reissue(struct io_kiocb *req)
@@ -2476,13 +2509,14 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
{
int cflags = 0;
- if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_reissue(req))
+ if (req->rw.kiocb.ki_flags & IOCB_WRITE)
+ kiocb_end_write(req);
+ if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_should_reissue(req)) {
+ req->flags |= REQ_F_REISSUE;
return;
+ }
if (res != req->result)
req_set_fail_links(req);
-
- if (req->rw.kiocb.ki_flags & IOCB_WRITE)
- kiocb_end_write(req);
if (req->flags & REQ_F_BUFFER_SELECTED)
cflags = io_put_rw_kbuf(req);
__io_req_complete(req, issue_flags, res, cflags);
@@ -2843,7 +2877,7 @@ static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
lockdep_assert_held(&req->ctx->uring_lock);
- head = idr_find(&req->ctx->io_buffer_idr, bgid);
+ head = xa_load(&req->ctx->io_buffers, bgid);
if (head) {
if (!list_empty(&head->list)) {
kbuf = list_last_entry(&head->list, struct io_buffer,
@@ -2851,7 +2885,7 @@ static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
list_del(&kbuf->list);
} else {
kbuf = head;
- idr_remove(&req->ctx->io_buffer_idr, bgid);
+ xa_erase(&req->ctx->io_buffers, bgid);
}
if (*len > kbuf->len)
*len = kbuf->len;
@@ -3259,11 +3293,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
ret = io_iter_do_read(req, iter);
- if (ret == -EIOCBQUEUED) {
- if (req->async_data)
- iov_iter_revert(iter, io_size - iov_iter_count(iter));
- goto out_free;
- } else if (ret == -EAGAIN) {
+ if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) {
/* IOPOLL retry should happen for io-wq threads */
if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
goto done;
@@ -3273,6 +3303,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
/* some cases will consume bytes even on error returns */
iov_iter_revert(iter, io_size - iov_iter_count(iter));
ret = 0;
+ } else if (ret == -EIOCBQUEUED) {
+ goto out_free;
} else if (ret <= 0 || ret == io_size || !force_nonblock ||
(req->flags & REQ_F_NOWAIT) || !(req->flags & REQ_F_ISREG)) {
/* read all, failed, already did sync or don't want to retry */
@@ -3385,6 +3417,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
else
ret2 = -EINVAL;
+ if (req->flags & REQ_F_REISSUE)
+ ret2 = -EAGAIN;
+
/*
* Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just
* retry them without IOCB_NOWAIT.
@@ -3394,8 +3429,6 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
/* no retry on NONBLOCK nor RWF_NOWAIT */
if (ret2 == -EAGAIN && (req->flags & REQ_F_NOWAIT))
goto done;
- if (ret2 == -EIOCBQUEUED && req->async_data)
- iov_iter_revert(iter, io_size - iov_iter_count(iter));
if (!force_nonblock || ret2 != -EAGAIN) {
/* IOPOLL retry should happen for io-wq threads */
if ((req->ctx->flags & IORING_SETUP_IOPOLL) && ret2 == -EAGAIN)
@@ -3892,7 +3925,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
}
i++;
kfree(buf);
- idr_remove(&ctx->io_buffer_idr, bgid);
+ xa_erase(&ctx->io_buffers, bgid);
return i;
}
@@ -3910,7 +3943,7 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
lockdep_assert_held(&ctx->uring_lock);
ret = -ENOENT;
- head = idr_find(&ctx->io_buffer_idr, p->bgid);
+ head = xa_load(&ctx->io_buffers, p->bgid);
if (head)
ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs);
if (ret < 0)
@@ -3930,6 +3963,7 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
static int io_provide_buffers_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
+ unsigned long size;
struct io_provide_buf *p = &req->pbuf;
u64 tmp;
@@ -3943,7 +3977,8 @@ static int io_provide_buffers_prep(struct io_kiocb *req,
p->addr = READ_ONCE(sqe->addr);
p->len = READ_ONCE(sqe->len);
- if (!access_ok(u64_to_user_ptr(p->addr), (p->len * p->nbufs)))
+ size = (unsigned long)p->len * p->nbufs;
+ if (!access_ok(u64_to_user_ptr(p->addr), size))
return -EFAULT;
p->bgid = READ_ONCE(sqe->buf_group);
@@ -3993,21 +4028,14 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
lockdep_assert_held(&ctx->uring_lock);
- list = head = idr_find(&ctx->io_buffer_idr, p->bgid);
+ list = head = xa_load(&ctx->io_buffers, p->bgid);
ret = io_add_buffers(p, &head);
- if (ret < 0)
- goto out;
-
- if (!list) {
- ret = idr_alloc(&ctx->io_buffer_idr, head, p->bgid, p->bgid + 1,
- GFP_KERNEL);
- if (ret < 0) {
+ if (ret >= 0 && !list) {
+ ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL);
+ if (ret < 0)
__io_remove_buffers(ctx, head, p->bgid, -1U);
- goto out;
- }
}
-out:
if (ret < 0)
req_set_fail_links(req);
@@ -4345,6 +4373,7 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
struct io_async_msghdr iomsg, *kmsg;
struct socket *sock;
unsigned flags;
+ int min_ret = 0;
int ret;
sock = sock_from_file(req->file);
@@ -4359,12 +4388,15 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
kmsg = &iomsg;
}
- flags = req->sr_msg.msg_flags;
+ flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
if (flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
else if (issue_flags & IO_URING_F_NONBLOCK)
flags |= MSG_DONTWAIT;
+ if (flags & MSG_WAITALL)
+ min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+
ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN)
return io_setup_async_msg(req, kmsg);
@@ -4375,7 +4407,7 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
if (kmsg->free_iov)
kfree(kmsg->free_iov);
req->flags &= ~REQ_F_NEED_CLEANUP;
- if (ret < 0)
+ if (ret < min_ret)
req_set_fail_links(req);
__io_req_complete(req, issue_flags, ret, 0);
return 0;
@@ -4388,6 +4420,7 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
struct iovec iov;
struct socket *sock;
unsigned flags;
+ int min_ret = 0;
int ret;
sock = sock_from_file(req->file);
@@ -4403,12 +4436,15 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
msg.msg_controllen = 0;
msg.msg_namelen = 0;
- flags = req->sr_msg.msg_flags;
+ flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
if (flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
else if (issue_flags & IO_URING_F_NONBLOCK)
flags |= MSG_DONTWAIT;
+ if (flags & MSG_WAITALL)
+ min_ret = iov_iter_count(&msg.msg_iter);
+
msg.msg_flags = flags;
ret = sock_sendmsg(sock, &msg);
if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN)
@@ -4416,7 +4452,7 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
if (ret == -ERESTARTSYS)
ret = -EINTR;
- if (ret < 0)
+ if (ret < min_ret)
req_set_fail_links(req);
__io_req_complete(req, issue_flags, ret, 0);
return 0;
@@ -4568,6 +4604,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
struct socket *sock;
struct io_buffer *kbuf;
unsigned flags;
+ int min_ret = 0;
int ret, cflags = 0;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
@@ -4593,12 +4630,15 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
1, req->sr_msg.len);
}
- flags = req->sr_msg.msg_flags;
+ flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
if (flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
else if (force_nonblock)
flags |= MSG_DONTWAIT;
+ if (flags & MSG_WAITALL)
+ min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+
ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg,
kmsg->uaddr, flags);
if (force_nonblock && ret == -EAGAIN)
@@ -4612,7 +4652,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
if (kmsg->free_iov)
kfree(kmsg->free_iov);
req->flags &= ~REQ_F_NEED_CLEANUP;
- if (ret < 0)
+ if (ret < min_ret || ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))))
req_set_fail_links(req);
__io_req_complete(req, issue_flags, ret, cflags);
return 0;
@@ -4627,6 +4667,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
struct socket *sock;
struct iovec iov;
unsigned flags;
+ int min_ret = 0;
int ret, cflags = 0;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
@@ -4652,12 +4693,15 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
msg.msg_iocb = NULL;
msg.msg_flags = 0;
- flags = req->sr_msg.msg_flags;
+ flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
if (flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
else if (force_nonblock)
flags |= MSG_DONTWAIT;
+ if (flags & MSG_WAITALL)
+ min_ret = iov_iter_count(&msg.msg_iter);
+
ret = sock_recvmsg(sock, &msg, flags);
if (force_nonblock && ret == -EAGAIN)
return -EAGAIN;
@@ -4666,7 +4710,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
out_free:
if (req->flags & REQ_F_BUFFER_SELECTED)
cflags = io_put_recv_kbuf(req);
- if (ret < 0)
+ if (ret < min_ret || ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))))
req_set_fail_links(req);
__io_req_complete(req, issue_flags, ret, cflags);
return 0;
@@ -4763,7 +4807,6 @@ static int io_connect(struct io_kiocb *req, unsigned int issue_flags)
ret = -ENOMEM;
goto out;
}
- io = req->async_data;
memcpy(req->async_data, &__io, sizeof(__io));
return -EAGAIN;
}
@@ -5526,7 +5569,8 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
data->mode = io_translate_timeout_mode(flags);
hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode);
- io_req_track_inflight(req);
+ if (is_timeout_link)
+ io_req_track_inflight(req);
return 0;
}
@@ -6129,6 +6173,7 @@ static void io_wq_submit_work(struct io_wq_work *work)
ret = -ECANCELED;
if (!ret) {
+ req->flags &= ~REQ_F_REISSUE;
do {
ret = io_issue_sqe(req, 0);
/*
@@ -6204,7 +6249,6 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
spin_unlock_irqrestore(&ctx->completion_lock, flags);
if (prev) {
- req_set_fail_links(prev);
io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME);
io_put_req_deferred(prev, 1);
} else {
@@ -6423,8 +6467,6 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
ret = io_init_req(ctx, req, sqe);
if (unlikely(ret)) {
fail_req:
- io_put_req(req);
- io_req_complete(req, ret);
if (link->head) {
/* fail even hard links since we don't submit */
link->head->flags |= REQ_F_FAIL_LINK;
@@ -6432,6 +6474,8 @@ fail_req:
io_req_complete(link->head, -ECANCELED);
link->head = NULL;
}
+ io_put_req(req);
+ io_req_complete(req, ret);
return ret;
}
ret = io_req_prep(req, sqe);
@@ -6684,7 +6728,7 @@ static int io_sq_thread(void *data)
char buf[TASK_COMM_LEN];
DEFINE_WAIT(wait);
- sprintf(buf, "iou-sqp-%d", sqd->task_pid);
+ snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid);
set_task_comm(current, buf);
current->pf_io_worker = NULL;
@@ -6694,22 +6738,30 @@ static int io_sq_thread(void *data)
set_cpus_allowed_ptr(current, cpu_online_mask);
current->flags |= PF_NO_SETAFFINITY;
- down_read(&sqd->rw_lock);
-
+ mutex_lock(&sqd->lock);
while (!test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)) {
int ret;
bool cap_entries, sqt_spin, needs_sched;
- if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state)) {
- up_read(&sqd->rw_lock);
+ if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state) ||
+ signal_pending(current)) {
+ bool did_sig = false;
+
+ mutex_unlock(&sqd->lock);
+ if (signal_pending(current)) {
+ struct ksignal ksig;
+
+ did_sig = get_signal(&ksig);
+ }
cond_resched();
- down_read(&sqd->rw_lock);
+ mutex_lock(&sqd->lock);
+ if (did_sig)
+ break;
io_run_task_work();
+ io_run_task_work_head(&sqd->park_task_work);
timeout = jiffies + sqd->sq_thread_idle;
continue;
}
- if (fatal_signal_pending(current))
- break;
sqt_spin = false;
cap_entries = !list_is_singular(&sqd->ctx_list);
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
@@ -6750,32 +6802,27 @@ static int io_sq_thread(void *data)
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
io_ring_set_wakeup_flag(ctx);
- up_read(&sqd->rw_lock);
+ mutex_unlock(&sqd->lock);
schedule();
- down_read(&sqd->rw_lock);
+ mutex_lock(&sqd->lock);
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
io_ring_clear_wakeup_flag(ctx);
}
finish_wait(&sqd->wait, &wait);
+ io_run_task_work_head(&sqd->park_task_work);
timeout = jiffies + sqd->sq_thread_idle;
}
- up_read(&sqd->rw_lock);
- down_write(&sqd->rw_lock);
- /*
- * someone may have parked and added a cancellation task_work, run
- * it first because we don't want it in io_uring_cancel_sqpoll()
- */
- io_run_task_work();
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
io_uring_cancel_sqpoll(ctx);
sqd->thread = NULL;
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
io_ring_set_wakeup_flag(ctx);
- up_write(&sqd->rw_lock);
+ mutex_unlock(&sqd->lock);
io_run_task_work();
+ io_run_task_work_head(&sqd->park_task_work);
complete(&sqd->exited);
do_exit(0);
}
@@ -6821,7 +6868,7 @@ static int io_run_task_work_sig(void)
return 1;
if (!signal_pending(current))
return 0;
- if (test_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL))
+ if (test_thread_flag(TIF_NOTIFY_SIGNAL))
return -ERESTARTSYS;
return -EINTR;
}
@@ -7075,23 +7122,28 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
}
static void io_sq_thread_unpark(struct io_sq_data *sqd)
- __releases(&sqd->rw_lock)
+ __releases(&sqd->lock)
{
WARN_ON_ONCE(sqd->thread == current);
+ /*
+ * Do the dance but not conditional clear_bit() because it'd race with
+ * other threads incrementing park_pending and setting the bit.
+ */
clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
- up_write(&sqd->rw_lock);
+ if (atomic_dec_return(&sqd->park_pending))
+ set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
+ mutex_unlock(&sqd->lock);
}
static void io_sq_thread_park(struct io_sq_data *sqd)
- __acquires(&sqd->rw_lock)
+ __acquires(&sqd->lock)
{
WARN_ON_ONCE(sqd->thread == current);
+ atomic_inc(&sqd->park_pending);
set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
- down_write(&sqd->rw_lock);
- /* set again for consistency, in case concurrent parks are happening */
- set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
+ mutex_lock(&sqd->lock);
if (sqd->thread)
wake_up_process(sqd->thread);
}
@@ -7100,17 +7152,19 @@ static void io_sq_thread_stop(struct io_sq_data *sqd)
{
WARN_ON_ONCE(sqd->thread == current);
- down_write(&sqd->rw_lock);
+ mutex_lock(&sqd->lock);
set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
if (sqd->thread)
wake_up_process(sqd->thread);
- up_write(&sqd->rw_lock);
+ mutex_unlock(&sqd->lock);
wait_for_completion(&sqd->exited);
}
static void io_put_sq_data(struct io_sq_data *sqd)
{
if (refcount_dec_and_test(&sqd->refs)) {
+ WARN_ON_ONCE(atomic_read(&sqd->park_pending));
+
io_sq_thread_stop(sqd);
kfree(sqd);
}
@@ -7184,9 +7238,10 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p,
if (!sqd)
return ERR_PTR(-ENOMEM);
+ atomic_set(&sqd->park_pending, 0);
refcount_set(&sqd->refs, 1);
INIT_LIST_HEAD(&sqd->ctx_list);
- init_rwsem(&sqd->rw_lock);
+ mutex_init(&sqd->lock);
init_waitqueue_head(&sqd->wait);
init_completion(&sqd->exited);
return sqd;
@@ -7866,22 +7921,17 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
ret = 0;
io_sq_thread_park(sqd);
+ list_add(&ctx->sqd_list, &sqd->ctx_list);
+ io_sqd_update_thread_idle(sqd);
/* don't attach to a dying SQPOLL thread, would be racy */
- if (attached && !sqd->thread) {
+ if (attached && !sqd->thread)
ret = -ENXIO;
- } else {
- list_add(&ctx->sqd_list, &sqd->ctx_list);
- io_sqd_update_thread_idle(sqd);
- }
io_sq_thread_unpark(sqd);
- if (ret < 0) {
- io_put_sq_data(sqd);
- ctx->sq_data = NULL;
- return ret;
- } else if (attached) {
+ if (ret < 0)
+ goto err;
+ if (attached)
return 0;
- }
if (p->flags & IORING_SETUP_SQ_AFF) {
int cpu = p->sq_thread_cpu;
@@ -8332,19 +8382,13 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
return -ENXIO;
}
-static int __io_destroy_buffers(int id, void *p, void *data)
-{
- struct io_ring_ctx *ctx = data;
- struct io_buffer *buf = p;
-
- __io_remove_buffers(ctx, buf, id, -1U);
- return 0;
-}
-
static void io_destroy_buffers(struct io_ring_ctx *ctx)
{
- idr_for_each(&ctx->io_buffer_idr, __io_destroy_buffers, ctx);
- idr_destroy(&ctx->io_buffer_idr);
+ struct io_buffer *buf;
+ unsigned long index;
+
+ xa_for_each(&ctx->io_buffers, index, buf)
+ __io_remove_buffers(ctx, buf, index, -1U);
}
static void io_req_cache_free(struct list_head *list, struct task_struct *tsk)
@@ -8386,11 +8430,13 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
{
/*
* Some may use context even when all refs and requests have been put,
- * and they are free to do so while still holding uring_lock, see
- * __io_req_task_submit(). Wait for them to finish.
+ * and they are free to do so while still holding uring_lock or
+ * completion_lock, see __io_req_task_submit(). Wait for them to finish.
*/
mutex_lock(&ctx->uring_lock);
mutex_unlock(&ctx->uring_lock);
+ spin_lock_irq(&ctx->completion_lock);
+ spin_unlock_irq(&ctx->completion_lock);
io_sq_thread_finish(ctx);
io_sqe_buffers_unregister(ctx);
@@ -8478,26 +8524,9 @@ static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
return -EINVAL;
}
-static bool io_run_ctx_fallback(struct io_ring_ctx *ctx)
+static inline bool io_run_ctx_fallback(struct io_ring_ctx *ctx)
{
- struct callback_head *work, *next;
- bool executed = false;
-
- do {
- work = xchg(&ctx->exit_task_work, NULL);
- if (!work)
- break;
-
- do {
- next = work->next;
- work->func(work);
- work = next;
- cond_resched();
- } while (work);
- executed = true;
- } while (1);
-
- return executed;
+ return io_run_task_work_head(&ctx->exit_task_work);
}
struct io_tctx_exit {
@@ -8529,6 +8558,14 @@ static void io_ring_exit_work(struct work_struct *work)
struct io_tctx_node *node;
int ret;
+ /* prevent SQPOLL from submitting new requests */
+ if (ctx->sq_data) {
+ io_sq_thread_park(ctx->sq_data);
+ list_del_init(&ctx->sqd_list);
+ io_sqd_update_thread_idle(ctx->sq_data);
+ io_sq_thread_unpark(ctx->sq_data);
+ }
+
/*
* If we're doing polled IO and end up having requests being
* submitted async (out-of-line), then completions can come in while
@@ -8565,6 +8602,28 @@ static void io_ring_exit_work(struct work_struct *work)
io_ring_ctx_free(ctx);
}
+/* Returns true if we found and killed one or more timeouts */
+static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
+ struct files_struct *files)
+{
+ struct io_kiocb *req, *tmp;
+ int canceled = 0;
+
+ spin_lock_irq(&ctx->completion_lock);
+ list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
+ if (io_match_task(req, tsk, files)) {
+ io_kill_timeout(req, -ECANCELED);
+ canceled++;
+ }
+ }
+ if (canceled != 0)
+ io_commit_cqring(ctx);
+ spin_unlock_irq(&ctx->completion_lock);
+ if (canceled != 0)
+ io_cqring_ev_posted(ctx);
+ return canceled != 0;
+}
+
static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
{
unsigned long index;
@@ -8879,7 +8938,7 @@ static void io_sqpoll_cancel_sync(struct io_ring_ctx *ctx)
if (task) {
init_completion(&work.completion);
init_task_work(&work.task_work, io_sqpoll_cancel_cb);
- WARN_ON_ONCE(task_work_add(task, &work.task_work, TWA_SIGNAL));
+ io_task_work_add_head(&sqd->park_task_work, &work.task_work);
wake_up_process(task);
}
io_sq_thread_unpark(sqd);
@@ -8956,6 +9015,8 @@ void __io_uring_task_cancel(void)
/* make sure overflow events are dropped */
atomic_inc(&tctx->in_idle);
+ __io_uring_files_cancel(NULL);
+
do {
/* read completions before cancelations */
inflight = tctx_inflight(tctx);
diff --git a/fs/iomap/swapfile.c b/fs/iomap/swapfile.c
index a648dbf6991e..a5e478de1417 100644
--- a/fs/iomap/swapfile.c
+++ b/fs/iomap/swapfile.c
@@ -170,6 +170,16 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
return ret;
}
+ /*
+ * If this swapfile doesn't contain even a single page-aligned
+ * contiguous range of blocks, reject this useless swapfile to
+ * prevent confusion later on.
+ */
+ if (isi.nr_pages == 0) {
+ pr_warn("swapon: Cannot find a single usable page in file.\n");
+ return -EINVAL;
+ }
+
*pagespan = 1 + isi.highest_ppage - isi.lowest_ppage;
sis->max = isi.nr_pages;
sis->pages = isi.nr_pages - 1;
diff --git a/fs/locks.c b/fs/locks.c
index 99ca97e81b7a..6125d2de39b8 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1808,9 +1808,6 @@ check_conflicting_open(struct file *filp, const long arg, int flags)
if (flags & FL_LAYOUT)
return 0;
- if (flags & FL_DELEG)
- /* We leave these checks to the caller. */
- return 0;
if (arg == F_RDLCK)
return inode_is_open_for_write(inode) ? -EAGAIN : 0;
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 821e5913faee..d6cff5fbe705 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -73,6 +73,7 @@ config NFSD_V4
select NFSD_V3
select FS_POSIX_ACL
select SUNRPC_GSS
+ select CRYPTO
select CRYPTO_MD5
select CRYPTO_SHA256
select GRACE_PERIOD
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 53fcbf79bdca..7629248fdd53 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -898,6 +898,8 @@ nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
continue;
if (!nfsd_match_cred(nf->nf_cred, current_cred()))
continue;
+ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags))
+ continue;
if (nfsd_file_get(nf) != NULL)
return nf;
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 052be5bf9ef5..7325592b456e 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1189,6 +1189,7 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
switch (task->tk_status) {
case -EIO:
case -ETIMEDOUT:
+ case -EACCES:
nfsd4_mark_cb_down(clp, task->tk_status);
}
break;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index acdb3cd806a1..dd9f38d072dd 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1302,7 +1302,7 @@ nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
struct nfsd_file *dst)
{
nfs42_ssc_close(src->nf_file);
- /* 'src' is freed by nfsd4_do_async_copy */
+ fput(src->nf_file);
nfsd_file_put(dst);
mntput(ss_mnt);
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 423fd6683f3a..97447a64bad0 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4940,31 +4940,6 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
return fl;
}
-static int nfsd4_check_conflicting_opens(struct nfs4_client *clp,
- struct nfs4_file *fp)
-{
- struct nfs4_clnt_odstate *co;
- struct file *f = fp->fi_deleg_file->nf_file;
- struct inode *ino = locks_inode(f);
- int writes = atomic_read(&ino->i_writecount);
-
- if (fp->fi_fds[O_WRONLY])
- writes--;
- if (fp->fi_fds[O_RDWR])
- writes--;
- if (writes > 0)
- return -EAGAIN;
- spin_lock(&fp->fi_lock);
- list_for_each_entry(co, &fp->fi_clnt_odstate, co_perfile) {
- if (co->co_client != clp) {
- spin_unlock(&fp->fi_lock);
- return -EAGAIN;
- }
- }
- spin_unlock(&fp->fi_lock);
- return 0;
-}
-
static struct nfs4_delegation *
nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate)
@@ -4984,12 +4959,9 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
nf = find_readable_file(fp);
if (!nf) {
- /*
- * We probably could attempt another open and get a read
- * delegation, but for now, don't bother until the
- * client actually sends us one.
- */
- return ERR_PTR(-EAGAIN);
+ /* We should always have a readable file here */
+ WARN_ON_ONCE(1);
+ return ERR_PTR(-EBADF);
}
spin_lock(&state_lock);
spin_lock(&fp->fi_lock);
@@ -5019,19 +4991,11 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
if (!fl)
goto out_clnt_odstate;
- status = nfsd4_check_conflicting_opens(clp, fp);
- if (status) {
- locks_free_lock(fl);
- goto out_clnt_odstate;
- }
status = vfs_setlease(fp->fi_deleg_file->nf_file, fl->fl_type, &fl, NULL);
if (fl)
locks_free_lock(fl);
if (status)
goto out_clnt_odstate;
- status = nfsd4_check_conflicting_opens(clp, fp);
- if (status)
- goto out_clnt_odstate;
spin_lock(&state_lock);
spin_lock(&fp->fi_lock);
@@ -5113,6 +5077,17 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
goto out_no_deleg;
if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
goto out_no_deleg;
+ /*
+ * Also, if the file was opened for write or
+ * create, there's a good chance the client's
+ * about to write to it, resulting in an
+ * immediate recall (since we don't support
+ * write delegations):
+ */
+ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
+ goto out_no_deleg;
+ if (open->op_create == NFS4_OPEN_CREATE)
+ goto out_no_deleg;
break;
default:
goto out_no_deleg;
@@ -5389,7 +5364,7 @@ nfs4_laundromat(struct nfsd_net *nn)
idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid);
if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID &&
- cps->cpntf_time > cutoff)
+ cps->cpntf_time < cutoff)
_free_cpntf_state_locked(nn, cps);
}
spin_unlock(&nn->s2s_cp_lock);
diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h
index 9b3b06da568c..e47fde1182de 100644
--- a/fs/reiserfs/xattr.h
+++ b/fs/reiserfs/xattr.h
@@ -44,7 +44,7 @@ void reiserfs_security_free(struct reiserfs_security_handle *sec);
static inline int reiserfs_xattrs_initialized(struct super_block *sb)
{
- return REISERFS_SB(sb)->priv_root != NULL;
+ return REISERFS_SB(sb)->priv_root && REISERFS_SB(sb)->xattr_root;
}
#define xattr_size(size) ((size) + sizeof(struct reiserfs_xattr_header))
diff --git a/fs/select.c b/fs/select.c
index 37aaa8317f3a..945896d0ac9e 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -1055,10 +1055,9 @@ static long do_restart_poll(struct restart_block *restart_block)
ret = do_sys_poll(ufds, nfds, to);
- if (ret == -ERESTARTNOHAND) {
- restart_block->fn = do_restart_poll;
- ret = -ERESTART_RESTARTBLOCK;
- }
+ if (ret == -ERESTARTNOHAND)
+ ret = set_restart_fn(restart_block, do_restart_poll);
+
return ret;
}
@@ -1080,7 +1079,6 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
struct restart_block *restart_block;
restart_block = &current->restart_block;
- restart_block->fn = do_restart_poll;
restart_block->poll.ufds = ufds;
restart_block->poll.nfds = nfds;
@@ -1091,7 +1089,7 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
} else
restart_block->poll.has_timeout = 0;
- ret = -ERESTART_RESTARTBLOCK;
+ ret = set_restart_fn(restart_block, do_restart_poll);
}
return ret;
}
diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c
index eb02072d28dd..723763746238 100644
--- a/fs/squashfs/export.c
+++ b/fs/squashfs/export.c
@@ -152,14 +152,18 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb,
start = le64_to_cpu(table[n]);
end = le64_to_cpu(table[n + 1]);
- if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) {
+ if (start >= end
+ || (end - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
kfree(table);
return ERR_PTR(-EINVAL);
}
}
start = le64_to_cpu(table[indexes - 1]);
- if (start >= lookup_table_start || (lookup_table_start - start) > SQUASHFS_METADATA_SIZE) {
+ if (start >= lookup_table_start ||
+ (lookup_table_start - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
kfree(table);
return ERR_PTR(-EINVAL);
}
diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c
index 11581bf31af4..ea5387679723 100644
--- a/fs/squashfs/id.c
+++ b/fs/squashfs/id.c
@@ -97,14 +97,16 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb,
start = le64_to_cpu(table[n]);
end = le64_to_cpu(table[n + 1]);
- if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) {
+ if (start >= end || (end - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
kfree(table);
return ERR_PTR(-EINVAL);
}
}
start = le64_to_cpu(table[indexes - 1]);
- if (start >= id_table_start || (id_table_start - start) > SQUASHFS_METADATA_SIZE) {
+ if (start >= id_table_start || (id_table_start - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
kfree(table);
return ERR_PTR(-EINVAL);
}
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 8d64edb80ebf..b3fdc8212c5f 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -17,6 +17,7 @@
/* size of metadata (inode and directory) blocks */
#define SQUASHFS_METADATA_SIZE 8192
+#define SQUASHFS_BLOCK_OFFSET 2
/* default size of block device I/O */
#ifdef CONFIG_SQUASHFS_4K_DEVBLK_SIZE
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index ead66670b41a..087cab8c78f4 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -109,14 +109,16 @@ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start,
start = le64_to_cpu(table[n]);
end = le64_to_cpu(table[n + 1]);
- if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) {
+ if (start >= end || (end - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
kfree(table);
return ERR_PTR(-EINVAL);
}
}
start = le64_to_cpu(table[indexes - 1]);
- if (start >= table_start || (table_start - start) > SQUASHFS_METADATA_SIZE) {
+ if (start >= table_start || (table_start - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
kfree(table);
return ERR_PTR(-EINVAL);
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 46a861d55e48..f93370bd7b1e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1007,9 +1007,10 @@ xfs_create(
/*
* Make sure that we have allocated dquot(s) on disk.
*/
- error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
- XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
- &udqp, &gdqp, &pdqp);
+ error = xfs_qm_vop_dqalloc(dp, fsuid_into_mnt(mnt_userns),
+ fsgid_into_mnt(mnt_userns), prid,
+ XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
+ &udqp, &gdqp, &pdqp);
if (error)
return error;
@@ -1157,9 +1158,10 @@ xfs_create_tmpfile(
/*
* Make sure that we have allocated dquot(s) on disk.
*/
- error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
- XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
- &udqp, &gdqp, &pdqp);
+ error = xfs_qm_vop_dqalloc(dp, fsuid_into_mnt(mnt_userns),
+ fsgid_into_mnt(mnt_userns), prid,
+ XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
+ &udqp, &gdqp, &pdqp);
if (error)
return error;
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index ca310a125d1e..3498b97fb06d 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -168,6 +168,12 @@ xfs_bulkstat_one(
};
int error;
+ if (breq->mnt_userns != &init_user_ns) {
+ xfs_warn_ratelimited(breq->mp,
+ "bulkstat not supported inside of idmapped mounts.");
+ return -EINVAL;
+ }
+
ASSERT(breq->icount == 1);
bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat),
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 52370d0a3f43..1c97b155a8ee 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -635,6 +635,47 @@ xfs_check_summary_counts(
}
/*
+ * Flush and reclaim dirty inodes in preparation for unmount. Inodes and
+ * internal inode structures can be sitting in the CIL and AIL at this point,
+ * so we need to unpin them, write them back and/or reclaim them before unmount
+ * can proceed.
+ *
+ * An inode cluster that has been freed can have its buffer still pinned in
+ * memory because the transaction is still sitting in a iclog. The stale inodes
+ * on that buffer will be pinned to the buffer until the transaction hits the
+ * disk and the callbacks run. Pushing the AIL will skip the stale inodes and
+ * may never see the pinned buffer, so nothing will push out the iclog and
+ * unpin the buffer.
+ *
+ * Hence we need to force the log to unpin everything first. However, log
+ * forces don't wait for the discards they issue to complete, so we have to
+ * explicitly wait for them to complete here as well.
+ *
+ * Then we can tell the world we are unmounting so that error handling knows
+ * that the filesystem is going away and we should error out anything that we
+ * have been retrying in the background. This will prevent never-ending
+ * retries in AIL pushing from hanging the unmount.
+ *
+ * Finally, we can push the AIL to clean all the remaining dirty objects, then
+ * reclaim the remaining inodes that are still in memory at this point in time.
+ */
+static void
+xfs_unmount_flush_inodes(
+ struct xfs_mount *mp)
+{
+ xfs_log_force(mp, XFS_LOG_SYNC);
+ xfs_extent_busy_wait_all(mp);
+ flush_workqueue(xfs_discard_wq);
+
+ mp->m_flags |= XFS_MOUNT_UNMOUNTING;
+
+ xfs_ail_push_all_sync(mp->m_ail);
+ cancel_delayed_work_sync(&mp->m_reclaim_work);
+ xfs_reclaim_inodes(mp);
+ xfs_health_unmount(mp);
+}
+
+/*
* This function does the following on an initial mount of a file system:
* - reads the superblock from disk and init the mount struct
* - if we're a 32-bit kernel, do a size check on the superblock
@@ -1008,7 +1049,7 @@ xfs_mountfs(
/* Clean out dquots that might be in memory after quotacheck. */
xfs_qm_unmount(mp);
/*
- * Cancel all delayed reclaim work and reclaim the inodes directly.
+ * Flush all inode reclamation work and flush the log.
* We have to do this /after/ rtunmount and qm_unmount because those
* two will have scheduled delayed reclaim for the rt/quota inodes.
*
@@ -1018,11 +1059,8 @@ xfs_mountfs(
* qm_unmount_quotas and therefore rely on qm_unmount to release the
* quota inodes.
*/
- cancel_delayed_work_sync(&mp->m_reclaim_work);
- xfs_reclaim_inodes(mp);
- xfs_health_unmount(mp);
+ xfs_unmount_flush_inodes(mp);
out_log_dealloc:
- mp->m_flags |= XFS_MOUNT_UNMOUNTING;
xfs_log_mount_cancel(mp);
out_fail_wait:
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
@@ -1063,47 +1101,7 @@ xfs_unmountfs(
xfs_rtunmount_inodes(mp);
xfs_irele(mp->m_rootip);
- /*
- * We can potentially deadlock here if we have an inode cluster
- * that has been freed has its buffer still pinned in memory because
- * the transaction is still sitting in a iclog. The stale inodes
- * on that buffer will be pinned to the buffer until the
- * transaction hits the disk and the callbacks run. Pushing the AIL will
- * skip the stale inodes and may never see the pinned buffer, so
- * nothing will push out the iclog and unpin the buffer. Hence we
- * need to force the log here to ensure all items are flushed into the
- * AIL before we go any further.
- */
- xfs_log_force(mp, XFS_LOG_SYNC);
-
- /*
- * Wait for all busy extents to be freed, including completion of
- * any discard operation.
- */
- xfs_extent_busy_wait_all(mp);
- flush_workqueue(xfs_discard_wq);
-
- /*
- * We now need to tell the world we are unmounting. This will allow
- * us to detect that the filesystem is going away and we should error
- * out anything that we have been retrying in the background. This will
- * prevent neverending retries in AIL pushing from hanging the unmount.
- */
- mp->m_flags |= XFS_MOUNT_UNMOUNTING;
-
- /*
- * Flush all pending changes from the AIL.
- */
- xfs_ail_push_all_sync(mp->m_ail);
-
- /*
- * Reclaim all inodes. At this point there should be no dirty inodes and
- * none should be pinned or locked. Stop background inode reclaim here
- * if it is still running.
- */
- cancel_delayed_work_sync(&mp->m_reclaim_work);
- xfs_reclaim_inodes(mp);
- xfs_health_unmount(mp);
+ xfs_unmount_flush_inodes(mp);
xfs_qm_unmount(mp);
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 1379013d74b8..7f368b10ded1 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -182,7 +182,8 @@ xfs_symlink(
/*
* Make sure that we have allocated dquot(s) on disk.
*/
- error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
+ error = xfs_qm_vop_dqalloc(dp, fsuid_into_mnt(mnt_userns),
+ fsgid_into_mnt(mnt_userns), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
&udqp, &gdqp, &pdqp);
if (error)
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 0fe76f376dee..049e36c69ed7 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -165,6 +165,21 @@ static int zonefs_writepages(struct address_space *mapping,
return iomap_writepages(mapping, wbc, &wpc, &zonefs_writeback_ops);
}
+static int zonefs_swap_activate(struct swap_info_struct *sis,
+ struct file *swap_file, sector_t *span)
+{
+ struct inode *inode = file_inode(swap_file);
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+
+ if (zi->i_ztype != ZONEFS_ZTYPE_CNV) {
+ zonefs_err(inode->i_sb,
+ "swap file: not a conventional zone file\n");
+ return -EINVAL;
+ }
+
+ return iomap_swapfile_activate(sis, swap_file, span, &zonefs_iomap_ops);
+}
+
static const struct address_space_operations zonefs_file_aops = {
.readpage = zonefs_readpage,
.readahead = zonefs_readahead,
@@ -177,6 +192,7 @@ static const struct address_space_operations zonefs_file_aops = {
.is_partially_uptodate = iomap_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
.direct_IO = noop_direct_IO,
+ .swap_activate = zonefs_swap_activate,
};
static void zonefs_update_stats(struct inode *inode, loff_t new_isize)
@@ -728,6 +744,68 @@ out_release:
}
/*
+ * Do not exceed the LFS limits nor the file zone size. If pos is under the
+ * limit it becomes a short access. If it exceeds the limit, return -EFBIG.
+ */
+static loff_t zonefs_write_check_limits(struct file *file, loff_t pos,
+ loff_t count)
+{
+ struct inode *inode = file_inode(file);
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ loff_t limit = rlimit(RLIMIT_FSIZE);
+ loff_t max_size = zi->i_max_size;
+
+ if (limit != RLIM_INFINITY) {
+ if (pos >= limit) {
+ send_sig(SIGXFSZ, current, 0);
+ return -EFBIG;
+ }
+ count = min(count, limit - pos);
+ }
+
+ if (!(file->f_flags & O_LARGEFILE))
+ max_size = min_t(loff_t, MAX_NON_LFS, max_size);
+
+ if (unlikely(pos >= max_size))
+ return -EFBIG;
+
+ return min(count, max_size - pos);
+}
+
+static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ loff_t count;
+
+ if (IS_SWAPFILE(inode))
+ return -ETXTBSY;
+
+ if (!iov_iter_count(from))
+ return 0;
+
+ if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
+ return -EINVAL;
+
+ if (iocb->ki_flags & IOCB_APPEND) {
+ if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
+ return -EINVAL;
+ mutex_lock(&zi->i_truncate_mutex);
+ iocb->ki_pos = zi->i_wpoffset;
+ mutex_unlock(&zi->i_truncate_mutex);
+ }
+
+ count = zonefs_write_check_limits(file, iocb->ki_pos,
+ iov_iter_count(from));
+ if (count < 0)
+ return count;
+
+ iov_iter_truncate(from, count);
+ return iov_iter_count(from);
+}
+
+/*
* Handle direct writes. For sequential zone files, this is the only possible
* write path. For these files, check that the user is issuing writes
* sequentially from the end of the file. This code assumes that the block layer
@@ -744,8 +822,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
struct super_block *sb = inode->i_sb;
bool sync = is_sync_kiocb(iocb);
bool append = false;
- size_t count;
- ssize_t ret;
+ ssize_t ret, count;
/*
* For async direct IOs to sequential zone files, refuse IOCB_NOWAIT
@@ -763,12 +840,11 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
inode_lock(inode);
}
- ret = generic_write_checks(iocb, from);
- if (ret <= 0)
+ count = zonefs_write_checks(iocb, from);
+ if (count <= 0) {
+ ret = count;
goto inode_unlock;
-
- iov_iter_truncate(from, zi->i_max_size - iocb->ki_pos);
- count = iov_iter_count(from);
+ }
if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
ret = -EINVAL;
@@ -828,12 +904,10 @@ static ssize_t zonefs_file_buffered_write(struct kiocb *iocb,
inode_lock(inode);
}
- ret = generic_write_checks(iocb, from);
+ ret = zonefs_write_checks(iocb, from);
if (ret <= 0)
goto inode_unlock;
- iov_iter_truncate(from, zi->i_max_size - iocb->ki_pos);
-
ret = iomap_file_buffered_write(iocb, from, &zonefs_iomap_ops);
if (ret > 0)
iocb->ki_pos += ret;
@@ -966,9 +1040,7 @@ static int zonefs_open_zone(struct inode *inode)
mutex_lock(&zi->i_truncate_mutex);
- zi->i_wr_refcnt++;
- if (zi->i_wr_refcnt == 1) {
-
+ if (!zi->i_wr_refcnt) {
if (atomic_inc_return(&sbi->s_open_zones) > sbi->s_max_open_zones) {
atomic_dec(&sbi->s_open_zones);
ret = -EBUSY;
@@ -978,7 +1050,6 @@ static int zonefs_open_zone(struct inode *inode)
if (i_size_read(inode) < zi->i_max_size) {
ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
if (ret) {
- zi->i_wr_refcnt--;
atomic_dec(&sbi->s_open_zones);
goto unlock;
}
@@ -986,6 +1057,8 @@ static int zonefs_open_zone(struct inode *inode)
}
}
+ zi->i_wr_refcnt++;
+
unlock:
mutex_unlock(&zi->i_truncate_mutex);