aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/fs_operation.c4
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/autofs/waitq.c2
-rw-r--r--fs/btrfs/backref.c1
-rw-r--r--fs/btrfs/ctree.c2
-rw-r--r--fs/btrfs/discard.c1
-rw-r--r--fs/btrfs/disk-io.c6
-rw-r--r--fs/btrfs/extent_io.c43
-rw-r--r--fs/btrfs/file.c1
-rw-r--r--fs/btrfs/inode.c32
-rw-r--r--fs/btrfs/ref-verify.c2
-rw-r--r--fs/btrfs/space-info.c2
-rw-r--r--fs/btrfs/super.c6
-rw-r--r--fs/btrfs/volumes.c8
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/cachefiles/rdwr.c2
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/file.c19
-rw-r--r--fs/cifs/inode.c10
-rw-r--r--fs/cifs/ioctl.c9
-rw-r--r--fs/cifs/smb2misc.c8
-rw-r--r--fs/cifs/smb2ops.c2
-rw-r--r--fs/efivarfs/super.c6
-rw-r--r--fs/exfat/dir.c2
-rw-r--r--fs/exfat/exfat_fs.h2
-rw-r--r--fs/exfat/file.c2
-rw-r--r--fs/exfat/nls.c8
-rw-r--r--fs/fuse/file.c132
-rw-r--r--fs/fuse/inode.c19
-rw-r--r--fs/gfs2/aops.c45
-rw-r--r--fs/gfs2/file.c52
-rw-r--r--fs/io_uring.c105
-rw-r--r--fs/namespace.c1
-rw-r--r--fs/nfs/direct.c13
-rw-r--r--fs/nfs/file.c1
-rw-r--r--fs/nfs/nfs4proc.c20
-rw-r--r--fs/nfsd/nfs4state.c20
-rw-r--r--fs/overlayfs/copy_up.c2
-rw-r--r--fs/overlayfs/export.c2
-rw-r--r--fs/overlayfs/file.c10
-rw-r--r--fs/overlayfs/namei.c15
-rw-r--r--fs/overlayfs/overlayfs.h1
-rw-r--r--fs/overlayfs/super.c73
-rw-r--r--fs/read_write.c131
-rw-r--r--fs/squashfs/block.c2
-rw-r--r--fs/zonefs/super.c18
46 files changed, 506 insertions, 341 deletions
diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c
index c264839b2fd0..24fd163c6323 100644
--- a/fs/afs/fs_operation.c
+++ b/fs/afs/fs_operation.c
@@ -71,7 +71,7 @@ static bool afs_get_io_locks(struct afs_operation *op)
swap(vnode, vnode2);
if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
- op->error = -EINTR;
+ op->error = -ERESTARTSYS;
op->flags |= AFS_OPERATION_STOP;
_leave(" = f [I 0]");
return false;
@@ -80,7 +80,7 @@ static bool afs_get_io_locks(struct afs_operation *op)
if (vnode2) {
if (mutex_lock_interruptible_nested(&vnode2->io_lock, 1) < 0) {
- op->error = -EINTR;
+ op->error = -ERESTARTSYS;
op->flags |= AFS_OPERATION_STOP;
mutex_unlock(&vnode->io_lock);
op->flags &= ~AFS_OPERATION_LOCK_0;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 7437806332d9..a121c247d95a 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -449,6 +449,7 @@ static int afs_store_data(struct address_space *mapping,
op->store.first_offset = offset;
op->store.last_to = to;
op->mtime = vnode->vfs_inode.i_mtime;
+ op->flags |= AFS_OPERATION_UNINTR;
op->ops = &afs_store_data_operation;
try_next_key:
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index b04c528b19d3..74c886f7c51c 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -53,7 +53,7 @@ static int autofs_write(struct autofs_sb_info *sbi,
mutex_lock(&sbi->pipe_mutex);
while (bytes) {
- wr = __kernel_write(file, data, bytes, &file->f_pos);
+ wr = kernel_write(file, data, bytes, &file->f_pos);
if (wr <= 0)
break;
data += wr;
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index d888e71e66b6..ea10f7bc99ab 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1461,6 +1461,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
if (ret < 0 && ret != -ENOENT) {
ulist_free(tmp);
ulist_free(*roots);
+ *roots = NULL;
return ret;
}
node = ulist_next(tmp, &uiter);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 3a7648bff42c..82ab6e5a386d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1196,7 +1196,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
switch (tm->op) {
case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
BUG_ON(tm->slot < n);
- /* Fallthrough */
+ fallthrough;
case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
case MOD_LOG_KEY_REMOVE:
btrfs_set_node_key(eb, &tm->key, tm->slot);
diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c
index 5615320fa659..741c7e19c32f 100644
--- a/fs/btrfs/discard.c
+++ b/fs/btrfs/discard.c
@@ -619,6 +619,7 @@ void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
bg_list) {
list_del_init(&block_group->bg_list);
+ btrfs_put_block_group(block_group);
btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
}
spin_unlock(&fs_info->unused_bgs_lock);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7c6f0bbb54a5..b1a148058773 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2593,10 +2593,12 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
!extent_buffer_uptodate(tree_root->node)) {
handle_error = true;
- if (IS_ERR(tree_root->node))
+ if (IS_ERR(tree_root->node)) {
ret = PTR_ERR(tree_root->node);
- else if (!extent_buffer_uptodate(tree_root->node))
+ tree_root->node = NULL;
+ } else if (!extent_buffer_uptodate(tree_root->node)) {
ret = -EUCLEAN;
+ }
btrfs_warn(fs_info, "failed to read tree root");
continue;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 68c96057ad2d..60278e52c37a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1999,7 +1999,8 @@ static int __process_pages_contig(struct address_space *mapping,
if (!PageDirty(pages[i]) ||
pages[i]->mapping != mapping) {
unlock_page(pages[i]);
- put_page(pages[i]);
+ for (; i < ret; i++)
+ put_page(pages[i]);
err = -EAGAIN;
goto out;
}
@@ -5058,25 +5059,28 @@ struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
static void check_buffer_tree_ref(struct extent_buffer *eb)
{
int refs;
- /* the ref bit is tricky. We have to make sure it is set
- * if we have the buffer dirty. Otherwise the
- * code to free a buffer can end up dropping a dirty
- * page
+ /*
+ * The TREE_REF bit is first set when the extent_buffer is added
+ * to the radix tree. It is also reset, if unset, when a new reference
+ * is created by find_extent_buffer.
*
- * Once the ref bit is set, it won't go away while the
- * buffer is dirty or in writeback, and it also won't
- * go away while we have the reference count on the
- * eb bumped.
+ * It is only cleared in two cases: freeing the last non-tree
+ * reference to the extent_buffer when its STALE bit is set or
+ * calling releasepage when the tree reference is the only reference.
*
- * We can't just set the ref bit without bumping the
- * ref on the eb because free_extent_buffer might
- * see the ref bit and try to clear it. If this happens
- * free_extent_buffer might end up dropping our original
- * ref by mistake and freeing the page before we are able
- * to add one more ref.
+ * In both cases, care is taken to ensure that the extent_buffer's
+ * pages are not under io. However, releasepage can be concurrently
+ * called with creating new references, which is prone to race
+ * conditions between the calls to check_buffer_tree_ref in those
+ * codepaths and clearing TREE_REF in try_release_extent_buffer.
*
- * So bump the ref count first, then set the bit. If someone
- * beat us to it, drop the ref we added.
+ * The actual lifetime of the extent_buffer in the radix tree is
+ * adequately protected by the refcount, but the TREE_REF bit and
+ * its corresponding reference are not. To protect against this
+ * class of races, we call check_buffer_tree_ref from the codepaths
+ * which trigger io after they set eb->io_pages. Note that once io is
+ * initiated, TREE_REF can no longer be cleared, so that is the
+ * moment at which any such race is best fixed.
*/
refs = atomic_read(&eb->refs);
if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
@@ -5527,6 +5531,11 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = 0;
atomic_set(&eb->io_pages, num_reads);
+ /*
+ * It is possible for releasepage to clear the TREE_REF bit before we
+ * set io_pages. See check_buffer_tree_ref for a more detailed comment.
+ */
+ check_buffer_tree_ref(eb);
for (i = 0; i < num_pages; i++) {
page = eb->pages[i];
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 2520605afc25..b0d2c976587e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3509,6 +3509,7 @@ const struct file_operations btrfs_file_operations = {
.read_iter = generic_file_read_iter,
.splice_read = generic_file_splice_read,
.write_iter = btrfs_file_write_iter,
+ .splice_write = iter_file_splice_write,
.mmap = btrfs_file_mmap,
.open = btrfs_file_open,
.release = btrfs_release_file,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 18d384f4af54..6862cd7e21a9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1690,12 +1690,8 @@ out_check:
ret = fallback_to_cow(inode, locked_page, cow_start,
found_key.offset - 1,
page_started, nr_written);
- if (ret) {
- if (nocow)
- btrfs_dec_nocow_writers(fs_info,
- disk_bytenr);
+ if (ret)
goto error;
- }
cow_start = (u64)-1;
}
@@ -1711,9 +1707,6 @@ out_check:
ram_bytes, BTRFS_COMPRESS_NONE,
BTRFS_ORDERED_PREALLOC);
if (IS_ERR(em)) {
- if (nocow)
- btrfs_dec_nocow_writers(fs_info,
- disk_bytenr);
ret = PTR_ERR(em);
goto error;
}
@@ -8130,20 +8123,17 @@ again:
/*
* Qgroup reserved space handler
* Page here will be either
- * 1) Already written to disk
- * In this case, its reserved space is released from data rsv map
- * and will be freed by delayed_ref handler finally.
- * So even we call qgroup_free_data(), it won't decrease reserved
- * space.
- * 2) Not written to disk
- * This means the reserved space should be freed here. However,
- * if a truncate invalidates the page (by clearing PageDirty)
- * and the page is accounted for while allocating extent
- * in btrfs_check_data_free_space() we let delayed_ref to
- * free the entire extent.
+ * 1) Already written to disk or ordered extent already submitted
+ * Then its QGROUP_RESERVED bit in io_tree is already cleaned.
+ * Qgroup will be handled by its qgroup_record then.
+ * btrfs_qgroup_free_data() call will do nothing here.
+ *
+ * 2) Not written to disk yet
+ * Then btrfs_qgroup_free_data() call will clear the QGROUP_RESERVED
+ * bit of its io_tree, and free the qgroup reserved data space.
+ * Since the IO will never happen for this page.
*/
- if (PageDirty(page))
- btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
+ btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
if (!inode_evicting) {
clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED |
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index 7887317033c9..af92525dbb16 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -509,7 +509,7 @@ static int process_leaf(struct btrfs_root *root,
switch (key.type) {
case BTRFS_EXTENT_ITEM_KEY:
*num_bytes = key.offset;
- /* fall through */
+ fallthrough;
case BTRFS_METADATA_ITEM_KEY:
*bytenr = key.objectid;
ret = process_extent_item(fs_info, path, &key, i,
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 41ee88633769..c7bd3fdd7792 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -879,8 +879,8 @@ static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
return false;
}
global_rsv->reserved -= ticket->bytes;
+ remove_ticket(space_info, ticket);
ticket->bytes = 0;
- list_del_init(&ticket->list);
wake_up(&ticket->wait);
space_info->tickets_id++;
if (global_rsv->reserved < global_rsv->size)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index bc73fd670702..c3826ae883f0 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -523,7 +523,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
case Opt_compress_force:
case Opt_compress_force_type:
compress_force = true;
- /* Fallthrough */
+ fallthrough;
case Opt_compress:
case Opt_compress_type:
saved_compress_type = btrfs_test_opt(info,
@@ -622,7 +622,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
btrfs_set_opt(info->mount_opt, NOSSD);
btrfs_clear_and_info(info, SSD,
"not using ssd optimizations");
- /* Fallthrough */
+ fallthrough;
case Opt_nossd_spread:
btrfs_clear_and_info(info, SSD_SPREAD,
"not using spread ssd allocation scheme");
@@ -793,7 +793,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
case Opt_recovery:
btrfs_warn(info,
"'recovery' is deprecated, use 'usebackuproot' instead");
- /* fall through */
+ fallthrough;
case Opt_usebackuproot:
btrfs_info(info,
"trying to use backup root at mount time");
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0d6e785bcb98..f403fb1e6d37 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -7052,6 +7052,14 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
mutex_lock(&fs_info->chunk_mutex);
/*
+ * It is possible for mount and umount to race in such a way that
+ * we execute this code path, but open_fs_devices failed to clear
+ * total_rw_bytes. We certainly want it cleared before reading the
+ * device items, so clear it here.
+ */
+ fs_info->fs_devices->total_rw_bytes = 0;
+
+ /*
* Read all device items, and then all the chunk items. All
* device items are found before any chunk item (their object id
* is smaller than the lowest possible object id for a chunk
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index f067b5934c46..75af2334b2e3 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -408,7 +408,7 @@ static inline enum btrfs_map_op btrfs_op(struct bio *bio)
return BTRFS_MAP_WRITE;
default:
WARN_ON_ONCE(1);
- /* fall through */
+ fallthrough;
case REQ_OP_READ:
return BTRFS_MAP_READ;
}
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index e7726f5f1241..3080cda9e824 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -937,7 +937,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
}
data = kmap(page);
- ret = __kernel_write(file, data, len, &pos);
+ ret = kernel_write(file, data, len, &pos);
kunmap(page);
fput(file);
if (ret != len)
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index c7a311d28d3d..99b3180c613a 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -156,5 +156,5 @@ extern int cifs_truncate_page(struct address_space *mapping, loff_t from);
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.27"
+#define CIFS_VERSION "2.28"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 9b0f8f33f832..be46fab4c96d 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1149,20 +1149,20 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock)
/*
* Set the byte-range lock (posix style). Returns:
- * 1) 0, if we set the lock and don't need to request to the server;
- * 2) 1, if we need to request to the server;
- * 3) <0, if the error occurs while setting the lock.
+ * 1) <0, if the error occurs while setting the lock;
+ * 2) 0, if we set the lock and don't need to request to the server;
+ * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
+ * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
*/
static int
cifs_posix_lock_set(struct file *file, struct file_lock *flock)
{
struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
- int rc = 1;
+ int rc = FILE_LOCK_DEFERRED + 1;
if ((flock->fl_flags & FL_POSIX) == 0)
return rc;
-try_again:
cifs_down_write(&cinode->lock_sem);
if (!cinode->can_cache_brlcks) {
up_write(&cinode->lock_sem);
@@ -1171,13 +1171,6 @@ try_again:
rc = posix_lock_file(file, flock, NULL);
up_write(&cinode->lock_sem);
- if (rc == FILE_LOCK_DEFERRED) {
- rc = wait_event_interruptible(flock->fl_wait,
- list_empty(&flock->fl_blocked_member));
- if (!rc)
- goto try_again;
- locks_delete_block(flock);
- }
return rc;
}
@@ -1652,7 +1645,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
int posix_lock_type;
rc = cifs_posix_lock_set(file, flock);
- if (!rc || rc < 0)
+ if (rc <= FILE_LOCK_DEFERRED)
return rc;
if (type & server->vals->shared_lock_type)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 49c3ea8aa845..ce95801e9b66 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2044,7 +2044,6 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry,
FILE_UNIX_BASIC_INFO *info_buf_target;
unsigned int xid;
int rc, tmprc;
- bool new_target = d_really_is_negative(target_dentry);
if (flags & ~RENAME_NOREPLACE)
return -EINVAL;
@@ -2121,13 +2120,8 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry,
*/
unlink_target:
- /*
- * If the target dentry was created during the rename, try
- * unlinking it if it's not negative
- */
- if (new_target &&
- d_really_is_positive(target_dentry) &&
- (rc == -EACCES || rc == -EEXIST)) {
+ /* Try unlinking the target dentry if it's not negative */
+ if (d_really_is_positive(target_dentry) && (rc == -EACCES || rc == -EEXIST)) {
if (d_is_dir(target_dentry))
tmprc = cifs_rmdir(target_dir, target_dentry);
else
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 4a73e63c4d43..dcde44ff6cf9 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -169,6 +169,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
unsigned int xid;
struct cifsFileInfo *pSMBFile = filep->private_data;
struct cifs_tcon *tcon;
+ struct tcon_link *tlink;
struct cifs_sb_info *cifs_sb;
__u64 ExtAttrBits = 0;
__u64 caps;
@@ -307,13 +308,19 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
break;
}
cifs_sb = CIFS_SB(inode->i_sb);
- tcon = tlink_tcon(cifs_sb_tlink(cifs_sb));
+ tlink = cifs_sb_tlink(cifs_sb);
+ if (IS_ERR(tlink)) {
+ rc = PTR_ERR(tlink);
+ break;
+ }
+ tcon = tlink_tcon(tlink);
if (tcon && tcon->ses->server->ops->notify) {
rc = tcon->ses->server->ops->notify(xid,
filep, (void __user *)arg);
cifs_dbg(FYI, "ioctl notify rc %d\n", rc);
} else
rc = -EOPNOTSUPP;
+ cifs_put_tlink(tlink);
break;
default:
cifs_dbg(FYI, "unsupported ioctl\n");
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 6a39451973f8..157992864ce7 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -354,9 +354,13 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_sync_hdr *shdr)
((struct smb2_ioctl_rsp *)shdr)->OutputCount);
break;
case SMB2_CHANGE_NOTIFY:
+ *off = le16_to_cpu(
+ ((struct smb2_change_notify_rsp *)shdr)->OutputBufferOffset);
+ *len = le32_to_cpu(
+ ((struct smb2_change_notify_rsp *)shdr)->OutputBufferLength);
+ break;
default:
- /* BB FIXME for unimplemented cases above */
- cifs_dbg(VFS, "no length check for command\n");
+ cifs_dbg(VFS, "no length check for command %d\n", le16_to_cpu(shdr->Command));
break;
}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index d9fdafa5eb60..32f90dc82c84 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2148,7 +2148,7 @@ smb3_notify(const unsigned int xid, struct file *pfile,
tcon = cifs_sb_master_tcon(cifs_sb);
oparms.tcon = tcon;
- oparms.desired_access = FILE_READ_ATTRIBUTES;
+ oparms.desired_access = FILE_READ_ATTRIBUTES | FILE_READ_DATA;
oparms.disposition = FILE_OPEN;
oparms.create_options = cifs_create_options(cifs_sb, 0);
oparms.fid = &fid;
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 12c66f5d92dd..28bb5689333a 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -201,6 +201,9 @@ static int efivarfs_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_d_op = &efivarfs_d_ops;
sb->s_time_gran = 1;
+ if (!efivar_supports_writes())
+ sb->s_flags |= SB_RDONLY;
+
inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0, true);
if (!inode)
return -ENOMEM;
@@ -252,9 +255,6 @@ static struct file_system_type efivarfs_type = {
static __init int efivarfs_init(void)
{
- if (!efi_rt_services_supported(EFI_RT_SUPPORTED_VARIABLE_SERVICES))
- return -ENODEV;
-
if (!efivars_kobject())
return -ENODEV;
diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index 91ece649285d..119abf0d8dd6 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -1112,7 +1112,7 @@ found:
ret = exfat_get_next_cluster(sb, &clu.dir);
}
- if (ret || clu.dir != EXFAT_EOF_CLUSTER) {
+ if (ret || clu.dir == EXFAT_EOF_CLUSTER) {
/* just initialized hint_stat */
hint_stat->clu = p_dir->dir;
hint_stat->eidx = 0;
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 7579cd3bbadb..75c7bdbeba6d 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -371,7 +371,7 @@ static inline bool exfat_is_last_sector_in_cluster(struct exfat_sb_info *sbi,
static inline sector_t exfat_cluster_to_sector(struct exfat_sb_info *sbi,
unsigned int clus)
{
- return ((clus - EXFAT_RESERVED_CLUSTERS) << sbi->sect_per_clus_bits) +
+ return ((sector_t)(clus - EXFAT_RESERVED_CLUSTERS) << sbi->sect_per_clus_bits) +
sbi->data_start_sector;
}
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 3b7fea465fd4..a6a063830edc 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -176,7 +176,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size)
ep2->dentry.stream.size = 0;
} else {
ep2->dentry.stream.valid_size = cpu_to_le64(new_size);
- ep2->dentry.stream.size = ep->dentry.stream.valid_size;
+ ep2->dentry.stream.size = ep2->dentry.stream.valid_size;
}
if (new_size == 0) {
diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c
index 57b5a7a4d1f7..a3c927501e67 100644
--- a/fs/exfat/nls.c
+++ b/fs/exfat/nls.c
@@ -495,7 +495,7 @@ static int exfat_utf8_to_utf16(struct super_block *sb,
struct exfat_uni_name *p_uniname, int *p_lossy)
{
int i, unilen, lossy = NLS_NAME_NO_LOSSY;
- unsigned short upname[MAX_NAME_LENGTH + 1];
+ __le16 upname[MAX_NAME_LENGTH + 1];
unsigned short *uniname = p_uniname->name;
WARN_ON(!len);
@@ -519,7 +519,7 @@ static int exfat_utf8_to_utf16(struct super_block *sb,
exfat_wstrchr(bad_uni_chars, *uniname))
lossy |= NLS_NAME_LOSSY;
- upname[i] = exfat_toupper(sb, *uniname);
+ upname[i] = cpu_to_le16(exfat_toupper(sb, *uniname));
uniname++;
}
@@ -597,7 +597,7 @@ static int exfat_nls_to_ucs2(struct super_block *sb,
struct exfat_uni_name *p_uniname, int *p_lossy)
{
int i = 0, unilen = 0, lossy = NLS_NAME_NO_LOSSY;
- unsigned short upname[MAX_NAME_LENGTH + 1];
+ __le16 upname[MAX_NAME_LENGTH + 1];
unsigned short *uniname = p_uniname->name;
struct nls_table *nls = EXFAT_SB(sb)->nls_io;
@@ -611,7 +611,7 @@ static int exfat_nls_to_ucs2(struct super_block *sb,
exfat_wstrchr(bad_uni_chars, *uniname))
lossy |= NLS_NAME_LOSSY;
- upname[unilen] = exfat_toupper(sb, *uniname);
+ upname[unilen] = cpu_to_le16(exfat_toupper(sb, *uniname));
uniname++;
unilen++;
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index e573b0cd2737..83d917f7e542 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -18,6 +18,7 @@
#include <linux/swap.h>
#include <linux/falloc.h>
#include <linux/uio.h>
+#include <linux/fs.h>
static struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags,
struct fuse_page_desc **desc)
@@ -1586,7 +1587,6 @@ static void fuse_writepage_finish(struct fuse_conn *fc,
struct backing_dev_info *bdi = inode_to_bdi(inode);
int i;
- rb_erase(&wpa->writepages_entry, &fi->writepages);
for (i = 0; i < ap->num_pages; i++) {
dec_wb_stat(&bdi->wb, WB_WRITEBACK);
dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP);
@@ -1637,6 +1637,7 @@ __acquires(fi->lock)
out_free:
fi->writectr--;
+ rb_erase(&wpa->writepages_entry, &fi->writepages);
fuse_writepage_finish(fc, wpa);
spin_unlock(&fi->lock);
@@ -1674,7 +1675,8 @@ __acquires(fi->lock)
}
}
-static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa)
+static struct fuse_writepage_args *fuse_insert_writeback(struct rb_root *root,
+ struct fuse_writepage_args *wpa)
{
pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT;
pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1;
@@ -1697,11 +1699,17 @@ static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa)
else if (idx_to < curr_index)
p = &(*p)->rb_left;
else
- return (void) WARN_ON(true);
+ return curr;
}
rb_link_node(&wpa->writepages_entry, parent, p);
rb_insert_color(&wpa->writepages_entry, root);
+ return NULL;
+}
+
+static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa)
+{
+ WARN_ON(fuse_insert_writeback(root, wpa));
}
static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
@@ -1714,6 +1722,7 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
mapping_set_error(inode->i_mapping, error);
spin_lock(&fi->lock);
+ rb_erase(&wpa->writepages_entry, &fi->writepages);
while (wpa->next) {
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_write_in *inarg = &wpa->ia.write.in;
@@ -1952,14 +1961,14 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data)
}
/*
- * First recheck under fi->lock if the offending offset is still under
- * writeback. If yes, then iterate auxiliary write requests, to see if there's
+ * Check under fi->lock if the page is under writeback, and insert it onto the
+ * rb_tree if not. Otherwise iterate auxiliary write requests, to see if there's
* one already added for a page at this offset. If there's none, then insert
* this new request onto the auxiliary list, otherwise reuse the existing one by
- * copying the new page contents over to the old temporary page.
+ * swapping the new temp page with the old one.
*/
-static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa,
- struct page *page)
+static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa,
+ struct page *page)
{
struct fuse_inode *fi = get_fuse_inode(new_wpa->inode);
struct fuse_writepage_args *tmp;
@@ -1967,17 +1976,15 @@ static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa,
struct fuse_args_pages *new_ap = &new_wpa->ia.ap;
WARN_ON(new_ap->num_pages != 0);
+ new_ap->num_pages = 1;
spin_lock(&fi->lock);
- rb_erase(&new_wpa->writepages_entry, &fi->writepages);
- old_wpa = fuse_find_writeback(fi, page->index, page->index);
+ old_wpa = fuse_insert_writeback(&fi->writepages, new_wpa);
if (!old_wpa) {
- tree_insert(&fi->writepages, new_wpa);
spin_unlock(&fi->lock);
- return false;
+ return true;
}
- new_ap->num_pages = 1;
for (tmp = old_wpa->next; tmp; tmp = tmp->next) {
pgoff_t curr_index;
@@ -2006,7 +2013,41 @@ static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa,
fuse_writepage_free(new_wpa);
}
- return true;
+ return false;
+}
+
+static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page,
+ struct fuse_args_pages *ap,
+ struct fuse_fill_wb_data *data)
+{
+ WARN_ON(!ap->num_pages);
+
+ /*
+ * Being under writeback is unlikely but possible. For example direct
+ * read to an mmaped fuse file will set the page dirty twice; once when
+ * the pages are faulted with get_user_pages(), and then after the read
+ * completed.
+ */
+ if (fuse_page_is_writeback(data->inode, page->index))
+ return true;
+
+ /* Reached max pages */
+ if (ap->num_pages == fc->max_pages)
+ return true;
+
+ /* Reached max write bytes */
+ if ((ap->num_pages + 1) * PAGE_SIZE > fc->max_write)
+ return true;
+
+ /* Discontinuity */
+ if (data->orig_pages[ap->num_pages - 1]->index + 1 != page->index)
+ return true;
+
+ /* Need to grow the pages array? If so, did the expansion fail? */
+ if (ap->num_pages == data->max_pages && !fuse_pages_realloc(data))
+ return true;
+
+ return false;
}
static int fuse_writepages_fill(struct page *page,
@@ -2019,7 +2060,6 @@ static int fuse_writepages_fill(struct page *page,
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = get_fuse_conn(inode);
struct page *tmp_page;
- bool is_writeback;
int err;
if (!data->ff) {
@@ -2029,25 +2069,9 @@ static int fuse_writepages_fill(struct page *page,
goto out_unlock;
}
- /*
- * Being under writeback is unlikely but possible. For example direct
- * read to an mmaped fuse file will set the page dirty twice; once when
- * the pages are faulted with get_user_pages(), and then after the read
- * completed.
- */
- is_writeback = fuse_page_is_writeback(inode, page->index);
-
- if (wpa && ap->num_pages &&
- (is_writeback || ap->num_pages == fc->max_pages ||
- (ap->num_pages + 1) * PAGE_SIZE > fc->max_write ||
- data->orig_pages[ap->num_pages - 1]->index + 1 != page->index)) {
+ if (wpa && fuse_writepage_need_send(fc, page, ap, data)) {
fuse_writepages_send(data);
data->wpa = NULL;
- } else if (wpa && ap->num_pages == data->max_pages) {
- if (!fuse_pages_realloc(data)) {
- fuse_writepages_send(data);
- data->wpa = NULL;
- }
}
err = -ENOMEM;
@@ -2085,12 +2109,6 @@ static int fuse_writepages_fill(struct page *page,
ap->args.end = fuse_writepage_end;
ap->num_pages = 0;
wpa->inode = inode;
-
- spin_lock(&fi->lock);
- tree_insert(&fi->writepages, wpa);
- spin_unlock(&fi->lock);
-
- data->wpa = wpa;
}
set_page_writeback(page);
@@ -2098,26 +2116,25 @@ static int fuse_writepages_fill(struct page *page,
ap->pages[ap->num_pages] = tmp_page;
ap->descs[ap->num_pages].offset = 0;
ap->descs[ap->num_pages].length = PAGE_SIZE;
+ data->orig_pages[ap->num_pages] = page;
inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
err = 0;
- if (is_writeback && fuse_writepage_in_flight(wpa, page)) {
+ if (data->wpa) {
+ /*
+ * Protected by fi->lock against concurrent access by
+ * fuse_page_is_writeback().
+ */
+ spin_lock(&fi->lock);
+ ap->num_pages++;
+ spin_unlock(&fi->lock);
+ } else if (fuse_writepage_add(wpa, page)) {
+ data->wpa = wpa;
+ } else {
end_page_writeback(page);
- data->wpa = NULL;
- goto out_unlock;
}
- data->orig_pages[ap->num_pages] = page;
-
- /*
- * Protected by fi->lock against concurrent access by
- * fuse_page_is_writeback().
- */
- spin_lock(&fi->lock);
- ap->num_pages++;
- spin_unlock(&fi->lock);
-
out_unlock:
unlock_page(page);
@@ -2149,10 +2166,8 @@ static int fuse_writepages(struct address_space *mapping,
err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
if (data.wpa) {
- /* Ignore errors if we can write at least one page */
WARN_ON(!data.wpa->ia.ap.num_pages);
fuse_writepages_send(&data);
- err = 0;
}
if (data.ff)
fuse_file_put(data.ff, false, false);
@@ -2761,7 +2776,16 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
struct iovec *iov = iov_page;
iov->iov_base = (void __user *)arg;
- iov->iov_len = _IOC_SIZE(cmd);
+
+ switch (cmd) {
+ case FS_IOC_GETFLAGS:
+ case FS_IOC_SETFLAGS:
+ iov->iov_len = sizeof(int);
+ break;
+ default:
+ iov->iov_len = _IOC_SIZE(cmd);
+ break;
+ }
if (_IOC_DIR(cmd) & _IOC_WRITE) {
in_iov = iov;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 5b4aebf5821f..bba747520e9b 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -121,10 +121,12 @@ static void fuse_evict_inode(struct inode *inode)
}
}
-static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
+static int fuse_reconfigure(struct fs_context *fc)
{
+ struct super_block *sb = fc->root->d_sb;
+
sync_filesystem(sb);
- if (*flags & SB_MANDLOCK)
+ if (fc->sb_flags & SB_MANDLOCK)
return -EINVAL;
return 0;
@@ -475,6 +477,17 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
struct fuse_fs_context *ctx = fc->fs_private;
int opt;
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ /*
+ * Ignore options coming from mount(MS_REMOUNT) for backward
+ * compatibility.
+ */
+ if (fc->oldapi)
+ return 0;
+
+ return invalfc(fc, "No changes allowed in reconfigure");
+ }
+
opt = fs_parse(fc, fuse_fs_parameters, param, &result);
if (opt < 0)
return opt;
@@ -817,7 +830,6 @@ static const struct super_operations fuse_super_operations = {
.evict_inode = fuse_evict_inode,
.write_inode = fuse_write_inode,
.drop_inode = generic_delete_inode,
- .remount_fs = fuse_remount_fs,
.put_super = fuse_put_super,
.umount_begin = fuse_umount_begin,
.statfs = fuse_statfs,
@@ -1296,6 +1308,7 @@ static int fuse_get_tree(struct fs_context *fc)
static const struct fs_context_operations fuse_context_ops = {
.free = fuse_free_fc,
.parse_param = fuse_parse_param,
+ .reconfigure = fuse_reconfigure,
.get_tree = fuse_get_tree,
};
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 72c9560f4467..68cd700a2719 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -468,21 +468,10 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
}
-/**
- * __gfs2_readpage - readpage
- * @file: The file to read a page for
- * @page: The page to read
- *
- * This is the core of gfs2's readpage. It's used by the internal file
- * reading code as in that case we already hold the glock. Also it's
- * called by gfs2_readpage() once the required lock has been granted.
- */
-
static int __gfs2_readpage(void *file, struct page *page)
{
struct gfs2_inode *ip = GFS2_I(page->mapping->host);
struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
-
int error;
if (i_blocksize(page->mapping->host) == PAGE_SIZE &&
@@ -505,36 +494,11 @@ static int __gfs2_readpage(void *file, struct page *page)
* gfs2_readpage - read a page of a file
* @file: The file to read
* @page: The page of the file
- *
- * This deals with the locking required. We have to unlock and
- * relock the page in order to get the locking in the right
- * order.
*/
static int gfs2_readpage(struct file *file, struct page *page)
{
- struct address_space *mapping = page->mapping;
- struct gfs2_inode *ip = GFS2_I(mapping->host);
- struct gfs2_holder gh;
- int error;
-
- unlock_page(page);
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
- error = gfs2_glock_nq(&gh);
- if (unlikely(error))
- goto out;
- error = AOP_TRUNCATED_PAGE;
- lock_page(page);
- if (page->mapping == mapping && !PageUptodate(page))
- error = __gfs2_readpage(file, page);
- else
- unlock_page(page);
- gfs2_glock_dq(&gh);
-out:
- gfs2_holder_uninit(&gh);
- if (error && error != AOP_TRUNCATED_PAGE)
- lock_page(page);
- return error;
+ return __gfs2_readpage(file, page);
}
/**
@@ -598,16 +562,9 @@ static void gfs2_readahead(struct readahead_control *rac)
{
struct inode *inode = rac->mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_holder gh;
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
- if (gfs2_glock_nq(&gh))
- goto out_uninit;
if (!gfs2_is_stuffed(ip))
mpage_readahead(rac, gfs2_block_map);
- gfs2_glock_dq(&gh);
-out_uninit:
- gfs2_holder_uninit(&gh);
}
/**
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index fe305e4bfd37..bebde537ac8c 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -558,8 +558,29 @@ out_uninit:
return block_page_mkwrite_return(ret);
}
+static vm_fault_t gfs2_fault(struct vm_fault *vmf)
+{
+ struct inode *inode = file_inode(vmf->vma->vm_file);
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_holder gh;
+ vm_fault_t ret;
+ int err;
+
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+ err = gfs2_glock_nq(&gh);
+ if (err) {
+ ret = block_page_mkwrite_return(err);
+ goto out_uninit;
+ }
+ ret = filemap_fault(vmf);
+ gfs2_glock_dq(&gh);
+out_uninit:
+ gfs2_holder_uninit(&gh);
+ return ret;
+}
+
static const struct vm_operations_struct gfs2_vm_ops = {
- .fault = filemap_fault,
+ .fault = gfs2_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = gfs2_page_mkwrite,
};
@@ -824,6 +845,9 @@ out_uninit:
static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
+ struct gfs2_inode *ip;
+ struct gfs2_holder gh;
+ size_t written = 0;
ssize_t ret;
if (iocb->ki_flags & IOCB_DIRECT) {
@@ -832,7 +856,31 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return ret;
iocb->ki_flags &= ~IOCB_DIRECT;
}
- return generic_file_read_iter(iocb, to);
+ iocb->ki_flags |= IOCB_NOIO;
+ ret = generic_file_read_iter(iocb, to);
+ iocb->ki_flags &= ~IOCB_NOIO;
+ if (ret >= 0) {
+ if (!iov_iter_count(to))
+ return ret;
+ written = ret;
+ } else {
+ if (ret != -EAGAIN)
+ return ret;
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return ret;
+ }
+ ip = GFS2_I(iocb->ki_filp->f_mapping->host);
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+ ret = gfs2_glock_nq(&gh);
+ if (ret)
+ goto out_uninit;
+ ret = generic_file_read_iter(iocb, to);
+ if (ret > 0)
+ written += ret;
+ gfs2_glock_dq(&gh);
+out_uninit:
+ gfs2_holder_uninit(&gh);
+ return written ? written : ret;
}
/**
diff --git a/fs/io_uring.c b/fs/io_uring.c
index d37d7ea5ebe5..32b0064f806e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -605,6 +605,7 @@ enum {
struct async_poll {
struct io_poll_iocb poll;
+ struct io_poll_iocb *double_poll;
struct io_wq_work work;
};
@@ -1096,6 +1097,8 @@ static inline void io_prep_async_work(struct io_kiocb *req,
{
const struct io_op_def *def = &io_op_defs[req->opcode];
+ io_req_init_async(req);
+
if (req->flags & REQ_F_ISREG) {
if (def->hash_reg_file)
io_wq_hash_work(&req->work, file_inode(req->file));
@@ -1104,7 +1107,6 @@ static inline void io_prep_async_work(struct io_kiocb *req,
req->work.flags |= IO_WQ_WORK_UNBOUND;
}
- io_req_init_async(req);
io_req_work_grab_env(req, def);
*link = io_prep_linked_timeout(req);
@@ -1274,6 +1276,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
if (cqe) {
clear_bit(0, &ctx->sq_check_overflow);
clear_bit(0, &ctx->cq_check_overflow);
+ ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
}
spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx);
@@ -1311,6 +1314,7 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
if (list_empty(&ctx->cq_overflow_list)) {
set_bit(0, &ctx->sq_check_overflow);
set_bit(0, &ctx->cq_check_overflow);
+ ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
}
req->flags |= REQ_F_OVERFLOW;
refcount_inc(&req->refs);
@@ -3551,6 +3555,7 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (req->flags & REQ_F_NEED_CLEANUP)
return 0;
+ io->msg.msg.msg_name = &io->msg.addr;
io->msg.iov = io->msg.fast_iov;
ret = sendmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
&io->msg.iov);
@@ -3732,6 +3737,7 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
static int io_recvmsg_copy_hdr(struct io_kiocb *req, struct io_async_ctx *io)
{
+ io->msg.msg.msg_name = &io->msg.addr;
io->msg.iov = io->msg.fast_iov;
#ifdef CONFIG_COMPAT
@@ -3840,10 +3846,16 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock)
ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.msg,
kmsg->uaddr, flags);
- if (force_nonblock && ret == -EAGAIN)
- return io_setup_async_msg(req, kmsg);
+ if (force_nonblock && ret == -EAGAIN) {
+ ret = io_setup_async_msg(req, kmsg);
+ if (ret != -EAGAIN)
+ kfree(kbuf);
+ return ret;
+ }
if (ret == -ERESTARTSYS)
ret = -EINTR;
+ if (kbuf)
+ kfree(kbuf);
}
if (kmsg && kmsg->iov != kmsg->fast_iov)
@@ -4148,9 +4160,9 @@ static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll)
return false;
}
-static void io_poll_remove_double(struct io_kiocb *req)
+static void io_poll_remove_double(struct io_kiocb *req, void *data)
{
- struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io;
+ struct io_poll_iocb *poll = data;
lockdep_assert_held(&req->ctx->completion_lock);
@@ -4170,7 +4182,7 @@ static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error)
{
struct io_ring_ctx *ctx = req->ctx;
- io_poll_remove_double(req);
+ io_poll_remove_double(req, req->io);
req->poll.done = true;
io_cqring_fill_event(req, error ? error : mangle_poll(mask));
io_commit_cqring(ctx);
@@ -4213,21 +4225,21 @@ static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
int sync, void *key)
{
struct io_kiocb *req = wait->private;
- struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io;
+ struct io_poll_iocb *poll = req->apoll->double_poll;
__poll_t mask = key_to_poll(key);
/* for instances that support it check for an event match first: */
if (mask && !(mask & poll->events))
return 0;
- if (req->poll.head) {
+ if (poll && poll->head) {
bool done;
- spin_lock(&req->poll.head->lock);
- done = list_empty(&req->poll.wait.entry);
+ spin_lock(&poll->head->lock);
+ done = list_empty(&poll->wait.entry);
if (!done)
- list_del_init(&req->poll.wait.entry);
- spin_unlock(&req->poll.head->lock);
+ list_del_init(&poll->wait.entry);
+ spin_unlock(&poll->head->lock);
if (!done)
__io_async_wake(req, poll, mask, io_poll_task_func);
}
@@ -4247,7 +4259,8 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
}
static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
- struct wait_queue_head *head)
+ struct wait_queue_head *head,
+ struct io_poll_iocb **poll_ptr)
{
struct io_kiocb *req = pt->req;
@@ -4258,7 +4271,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
*/
if (unlikely(poll->head)) {
/* already have a 2nd entry, fail a third attempt */
- if (req->io) {
+ if (*poll_ptr) {
pt->error = -EINVAL;
return;
}
@@ -4270,7 +4283,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
io_init_poll_iocb(poll, req->poll.events, io_poll_double_wake);
refcount_inc(&req->refs);
poll->wait.private = req;
- req->io = (void *) poll;
+ *poll_ptr = poll;
}
pt->error = 0;
@@ -4282,8 +4295,9 @@ static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
struct poll_table_struct *p)
{
struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
+ struct async_poll *apoll = pt->req->apoll;
- __io_queue_proc(&pt->req->apoll->poll, pt, head);
+ __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll);
}
static void io_sq_thread_drop_mm(struct io_ring_ctx *ctx)
@@ -4333,11 +4347,13 @@ static void io_async_task_func(struct callback_head *cb)
}
}
+ io_poll_remove_double(req, apoll->double_poll);
spin_unlock_irq(&ctx->completion_lock);
/* restore ->work in case we need to retry again */
if (req->flags & REQ_F_WORK_INITIALIZED)
memcpy(&req->work, &apoll->work, sizeof(req->work));
+ kfree(apoll->double_poll);
kfree(apoll);
if (!canceled) {
@@ -4425,7 +4441,6 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
struct async_poll *apoll;
struct io_poll_table ipt;
__poll_t mask, ret;
- bool had_io;
if (!req->file || !file_can_poll(req->file))
return false;
@@ -4437,11 +4452,11 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
if (unlikely(!apoll))
return false;
+ apoll->double_poll = NULL;
req->flags |= REQ_F_POLLED;
if (req->flags & REQ_F_WORK_INITIALIZED)
memcpy(&apoll->work, &req->work, sizeof(req->work));
- had_io = req->io != NULL;
io_get_req_task(req);
req->apoll = apoll;
@@ -4459,13 +4474,11 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask,
io_async_wake);
if (ret) {
- ipt.error = 0;
- /* only remove double add if we did it here */
- if (!had_io)
- io_poll_remove_double(req);
+ io_poll_remove_double(req, apoll->double_poll);
spin_unlock_irq(&ctx->completion_lock);
if (req->flags & REQ_F_WORK_INITIALIZED)
memcpy(&req->work, &apoll->work, sizeof(req->work));
+ kfree(apoll->double_poll);
kfree(apoll);
return false;
}
@@ -4496,11 +4509,13 @@ static bool io_poll_remove_one(struct io_kiocb *req)
bool do_complete;
if (req->opcode == IORING_OP_POLL_ADD) {
- io_poll_remove_double(req);
+ io_poll_remove_double(req, req->io);
do_complete = __io_poll_remove_one(req, &req->poll);
} else {
struct async_poll *apoll = req->apoll;
+ io_poll_remove_double(req, apoll->double_poll);
+
/* non-poll requests have submit ref still */
do_complete = __io_poll_remove_one(req, &apoll->poll);
if (do_complete) {
@@ -4513,6 +4528,7 @@ static bool io_poll_remove_one(struct io_kiocb *req)
if (req->flags & REQ_F_WORK_INITIALIZED)
memcpy(&req->work, &apoll->work,
sizeof(req->work));
+ kfree(apoll->double_poll);
kfree(apoll);
}
}
@@ -4613,7 +4629,7 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
{
struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
- __io_queue_proc(&pt->req->poll, pt, head);
+ __io_queue_proc(&pt->req->poll, pt, head, (struct io_poll_iocb **) &pt->req->io);
}
static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -4721,7 +4737,9 @@ static int io_timeout_remove_prep(struct io_kiocb *req,
{
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->len)
+ if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+ return -EINVAL;
+ if (sqe->ioprio || sqe->buf_index || sqe->len)
return -EINVAL;
req->timeout.addr = READ_ONCE(sqe->addr);
@@ -4899,8 +4917,9 @@ static int io_async_cancel_prep(struct io_kiocb *req,
{
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->flags || sqe->ioprio || sqe->off || sqe->len ||
- sqe->cancel_flags)
+ if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+ return -EINVAL;
+ if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags)
return -EINVAL;
req->cancel.addr = READ_ONCE(sqe->addr);
@@ -4918,7 +4937,9 @@ static int io_async_cancel(struct io_kiocb *req)
static int io_files_update_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
- if (sqe->flags || sqe->ioprio || sqe->rw_flags)
+ if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+ return -EINVAL;
+ if (sqe->ioprio || sqe->rw_flags)
return -EINVAL;
req->files_update.offset = READ_ONCE(sqe->off);
@@ -5709,6 +5730,7 @@ fail_req:
* Never try inline submit of IOSQE_ASYNC is set, go straight
* to async execution.
*/
+ io_req_init_async(req);
req->work.flags |= IO_WQ_WORK_CONCURRENT;
io_queue_async_work(req);
} else {
@@ -6080,9 +6102,9 @@ static int io_sq_thread(void *data)
}
/* Tell userspace we may need a wakeup call */
+ spin_lock_irq(&ctx->completion_lock);
ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP;
- /* make sure to read SQ tail after writing flags */
- smp_mb();
+ spin_unlock_irq(&ctx->completion_lock);
to_submit = io_sqring_entries(ctx);
if (!to_submit || ret == -EBUSY) {
@@ -6100,13 +6122,17 @@ static int io_sq_thread(void *data)
schedule();
finish_wait(&ctx->sqo_wait, &wait);
+ spin_lock_irq(&ctx->completion_lock);
ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
+ spin_unlock_irq(&ctx->completion_lock);
ret = 0;
continue;
}
finish_wait(&ctx->sqo_wait, &wait);
+ spin_lock_irq(&ctx->completion_lock);
ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
+ spin_unlock_irq(&ctx->completion_lock);
}
mutex_lock(&ctx->uring_lock);
@@ -6693,6 +6719,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
for (i = 0; i < nr_tables; i++)
kfree(ctx->file_data->table[i].files);
+ percpu_ref_exit(&ctx->file_data->refs);
kfree(ctx->file_data->table);
kfree(ctx->file_data);
ctx->file_data = NULL;
@@ -6845,8 +6872,10 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
}
table->files[index] = file;
err = io_sqe_file_register(ctx, file, i);
- if (err)
+ if (err) {
+ fput(file);
break;
+ }
}
nr_args--;
done++;
@@ -7342,9 +7371,6 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_mem_free(ctx->sq_sqes);
percpu_ref_exit(&ctx->refs);
- if (ctx->account_mem)
- io_unaccount_mem(ctx->user,
- ring_pages(ctx->sq_entries, ctx->cq_entries));
free_uid(ctx->user);
put_cred(ctx->creds);
kfree(ctx->cancel_hash);
@@ -7429,6 +7455,16 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
if (ctx->rings)
io_cqring_overflow_flush(ctx, true);
idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx);
+
+ /*
+ * Do this upfront, so we won't have a grace period where the ring
+ * is closed but resources aren't reaped yet. This can cause
+ * spurious failure in setting up a new ring.
+ */
+ if (ctx->account_mem)
+ io_unaccount_mem(ctx->user,
+ ring_pages(ctx->sq_entries, ctx->cq_entries));
+
INIT_WORK(&ctx->exit_work, io_ring_exit_work);
queue_work(system_wq, &ctx->exit_work);
}
@@ -7488,6 +7524,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
if (list_empty(&ctx->cq_overflow_list)) {
clear_bit(0, &ctx->sq_check_overflow);
clear_bit(0, &ctx->cq_check_overflow);
+ ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
}
spin_unlock_irq(&ctx->completion_lock);
diff --git a/fs/namespace.c b/fs/namespace.c
index f30ed401cc6d..4a0f600a3328 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2603,6 +2603,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
if (IS_ERR(fc))
return PTR_ERR(fc);
+ fc->oldapi = true;
err = parse_monolithic_mount_data(fc, data);
if (!err) {
down_write(&sb->s_umount);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 3d113cf8908a..1b79dd5cf661 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -267,6 +267,8 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
{
struct inode *inode = dreq->inode;
+ inode_dio_end(inode);
+
if (dreq->iocb) {
long res = (long) dreq->error;
if (dreq->count != 0) {
@@ -278,10 +280,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
complete(&dreq->completion);
- igrab(inode);
nfs_direct_req_release(dreq);
- inode_dio_end(inode);
- iput(inode);
}
static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
@@ -411,10 +410,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
* generic layer handle the completion.
*/
if (requested_bytes == 0) {
- igrab(inode);
- nfs_direct_req_release(dreq);
inode_dio_end(inode);
- iput(inode);
+ nfs_direct_req_release(dreq);
return result < 0 ? result : -EIO;
}
@@ -867,10 +864,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
* generic layer handle the completion.
*/
if (requested_bytes == 0) {
- igrab(inode);
- nfs_direct_req_release(dreq);
inode_dio_end(inode);
- iput(inode);
+ nfs_direct_req_release(dreq);
return result < 0 ? result : -EIO;
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index ccd6c1637b27..f96367a2463e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -83,7 +83,6 @@ nfs_file_release(struct inode *inode, struct file *filp)
dprintk("NFS: release(%pD2)\n", filp);
nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
- inode_dio_wait(inode);
nfs_file_clear_open_context(filp);
return 0;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e32717fd1169..2e2dac29a9e9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -774,6 +774,14 @@ static void nfs4_slot_sequence_acked(struct nfs4_slot *slot,
slot->seq_nr_last_acked = seqnr;
}
+static void nfs4_probe_sequence(struct nfs_client *client, const struct cred *cred,
+ struct nfs4_slot *slot)
+{
+ struct rpc_task *task = _nfs41_proc_sequence(client, cred, slot, true);
+ if (!IS_ERR(task))
+ rpc_put_task_async(task);
+}
+
static int nfs41_sequence_process(struct rpc_task *task,
struct nfs4_sequence_res *res)
{
@@ -790,6 +798,7 @@ static int nfs41_sequence_process(struct rpc_task *task,
goto out;
session = slot->table->session;
+ clp = session->clp;
trace_nfs4_sequence_done(session, res);
@@ -804,7 +813,6 @@ static int nfs41_sequence_process(struct rpc_task *task,
nfs4_slot_sequence_acked(slot, slot->seq_nr);
/* Update the slot's sequence and clientid lease timer */
slot->seq_done = 1;
- clp = session->clp;
do_renew_lease(clp, res->sr_timestamp);
/* Check sequence flags */
nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags,
@@ -852,10 +860,18 @@ static int nfs41_sequence_process(struct rpc_task *task,
/*
* Were one or more calls using this slot interrupted?
* If the server never received the request, then our
- * transmitted slot sequence number may be too high.
+ * transmitted slot sequence number may be too high. However,
+ * if the server did receive the request then it might
+ * accidentally give us a reply with a mismatched operation.
+ * We can sort this out by sending a lone sequence operation
+ * to the server on the same slot.
*/
if ((s32)(slot->seq_nr - slot->seq_nr_last_acked) > 1) {
slot->seq_nr--;
+ if (task->tk_msg.rpc_proc != &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE]) {
+ nfs4_probe_sequence(clp, task->tk_msg.rpc_cred, slot);
+ res->sr_slot = NULL;
+ }
goto retry_nowait;
}
/*
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index cce2510b2cca..c9056316a0b3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -507,6 +507,17 @@ find_any_file(struct nfs4_file *f)
return ret;
}
+static struct nfsd_file *find_deleg_file(struct nfs4_file *f)
+{
+ struct nfsd_file *ret = NULL;
+
+ spin_lock(&f->fi_lock);
+ if (f->fi_deleg_file)
+ ret = nfsd_file_get(f->fi_deleg_file);
+ spin_unlock(&f->fi_lock);
+ return ret;
+}
+
static atomic_long_t num_delegations;
unsigned long max_delegations;
@@ -2444,6 +2455,8 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
oo = ols->st_stateowner;
nf = st->sc_file;
file = find_any_file(nf);
+ if (!file)
+ return 0;
seq_printf(s, "- ");
nfs4_show_stateid(s, &st->sc_stateid);
@@ -2481,6 +2494,8 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
oo = ols->st_stateowner;
nf = st->sc_file;
file = find_any_file(nf);
+ if (!file)
+ return 0;
seq_printf(s, "- ");
nfs4_show_stateid(s, &st->sc_stateid);
@@ -2513,7 +2528,9 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
ds = delegstateid(st);
nf = st->sc_file;
- file = nf->fi_deleg_file;
+ file = find_deleg_file(nf);
+ if (!file)
+ return 0;
seq_printf(s, "- ");
nfs4_show_stateid(s, &st->sc_stateid);
@@ -2529,6 +2546,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
seq_printf(s, ", ");
nfs4_show_fname(s, file);
seq_printf(s, " }\n");
+ nfsd_file_put(file);
return 0;
}
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 79dd052c7dbf..5e0cde85bd6b 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -895,7 +895,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
return err;
}
-int ovl_copy_up_flags(struct dentry *dentry, int flags)
+static int ovl_copy_up_flags(struct dentry *dentry, int flags)
{
int err = 0;
const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 8f4286450f92..0e696f72cf65 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -476,7 +476,7 @@ static struct dentry *ovl_lookup_real_inode(struct super_block *sb,
if (IS_ERR_OR_NULL(this))
return this;
- if (WARN_ON(ovl_dentry_real_at(this, layer->idx) != real)) {
+ if (ovl_dentry_real_at(this, layer->idx) != real) {
dput(this);
this = ERR_PTR(-EIO);
}
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 01820e654a21..0d940e29d62b 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -33,13 +33,16 @@ static char ovl_whatisit(struct inode *inode, struct inode *realinode)
return 'm';
}
+/* No atime modificaton nor notify on underlying */
+#define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY)
+
static struct file *ovl_open_realfile(const struct file *file,
struct inode *realinode)
{
struct inode *inode = file_inode(file);
struct file *realfile;
const struct cred *old_cred;
- int flags = file->f_flags | O_NOATIME | FMODE_NONOTIFY;
+ int flags = file->f_flags | OVL_OPEN_FLAGS;
int acc_mode = ACC_MODE(flags);
int err;
@@ -72,8 +75,7 @@ static int ovl_change_flags(struct file *file, unsigned int flags)
struct inode *inode = file_inode(file);
int err;
- /* No atime modificaton on underlying */
- flags |= O_NOATIME | FMODE_NONOTIFY;
+ flags |= OVL_OPEN_FLAGS;
/* If some flag changed that cannot be changed then something's amiss */
if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
@@ -126,7 +128,7 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
}
/* Did the flags change since open? */
- if (unlikely((file->f_flags ^ real->file->f_flags) & ~O_NOATIME))
+ if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
return ovl_change_flags(real->file, file->f_flags);
return 0;
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 3566282a9199..f7d4358db637 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -389,7 +389,7 @@ invalid:
}
static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
- struct ovl_path **stackp, unsigned int *ctrp)
+ struct ovl_path **stackp)
{
struct ovl_fh *fh = ovl_get_fh(upperdentry, OVL_XATTR_ORIGIN);
int err;
@@ -406,10 +406,6 @@ static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
return err;
}
- if (WARN_ON(*ctrp))
- return -EIO;
-
- *ctrp = 1;
return 0;
}
@@ -861,8 +857,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
goto out;
}
if (upperdentry && !d.is_dir) {
- unsigned int origin_ctr = 0;
-
/*
* Lookup copy up origin by decoding origin file handle.
* We may get a disconnected dentry, which is fine,
@@ -873,8 +867,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
* number - it's the same as if we held a reference
* to a dentry in lower layer that was moved under us.
*/
- err = ovl_check_origin(ofs, upperdentry, &origin_path,
- &origin_ctr);
+ err = ovl_check_origin(ofs, upperdentry, &origin_path);
if (err)
goto out_put_upper;
@@ -1073,6 +1066,10 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
upperredirect = NULL;
goto out_free_oe;
}
+ err = ovl_check_metacopy_xattr(upperdentry);
+ if (err < 0)
+ goto out_free_oe;
+ uppermetacopy = err;
}
if (upperdentry || ctr) {
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index b725c7f15ff4..29bc1ec699e7 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -483,7 +483,6 @@ void ovl_aio_request_cache_destroy(void);
/* copy_up.c */
int ovl_copy_up(struct dentry *dentry);
int ovl_copy_up_with_data(struct dentry *dentry);
-int ovl_copy_up_flags(struct dentry *dentry, int flags);
int ovl_maybe_copy_up(struct dentry *dentry, int flags);
int ovl_copy_xattr(struct dentry *old, struct dentry *new);
int ovl_set_attr(struct dentry *upper, struct kstat *stat);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 91476bc422f9..4b38141c2985 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -580,12 +580,19 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
}
}
- /* Workdir is useless in non-upper mount */
- if (!config->upperdir && config->workdir) {
- pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
- config->workdir);
- kfree(config->workdir);
- config->workdir = NULL;
+ /* Workdir/index are useless in non-upper mount */
+ if (!config->upperdir) {
+ if (config->workdir) {
+ pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
+ config->workdir);
+ kfree(config->workdir);
+ config->workdir = NULL;
+ }
+ if (config->index && index_opt) {
+ pr_info("option \"index=on\" is useless in a non-upper mount, ignore\n");
+ index_opt = false;
+ }
+ config->index = false;
}
err = ovl_parse_redirect_mode(config, config->redirect_mode);
@@ -622,11 +629,13 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
/* Resolve nfs_export -> index dependency */
if (config->nfs_export && !config->index) {
- if (nfs_export_opt && index_opt) {
+ if (!config->upperdir && config->redirect_follow) {
+ pr_info("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
+ config->nfs_export = false;
+ } else if (nfs_export_opt && index_opt) {
pr_err("conflicting options: nfs_export=on,index=off\n");
return -EINVAL;
- }
- if (index_opt) {
+ } else if (index_opt) {
/*
* There was an explicit index=off that resulted
* in this conflict.
@@ -1352,8 +1361,15 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
goto out;
}
+ /* index dir will act also as workdir */
+ iput(ofs->workdir_trap);
+ ofs->workdir_trap = NULL;
+ dput(ofs->workdir);
+ ofs->workdir = NULL;
ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
if (ofs->indexdir) {
+ ofs->workdir = dget(ofs->indexdir);
+
err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap,
"indexdir");
if (err)
@@ -1396,6 +1412,18 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
if (!ofs->config.nfs_export && !ovl_upper_mnt(ofs))
return true;
+ /*
+ * We allow using single lower with null uuid for index and nfs_export
+ * for example to support those features with single lower squashfs.
+ * To avoid regressions in setups of overlay with re-formatted lower
+ * squashfs, do not allow decoding origin with lower null uuid unless
+ * user opted-in to one of the new features that require following the
+ * lower inode of non-dir upper.
+ */
+ if (!ofs->config.index && !ofs->config.metacopy && !ofs->config.xino &&
+ uuid_is_null(uuid))
+ return false;
+
for (i = 0; i < ofs->numfs; i++) {
/*
* We use uuid to associate an overlay lower file handle with a
@@ -1493,14 +1521,23 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
if (err < 0)
goto out;
+ /*
+ * Check if lower root conflicts with this overlay layers before
+ * checking if it is in-use as upperdir/workdir of "another"
+ * mount, because we do not bother to check in ovl_is_inuse() if
+ * the upperdir/workdir is in fact in-use by our
+ * upperdir/workdir.
+ */
err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir");
if (err)
goto out;
if (ovl_is_inuse(stack[i].dentry)) {
err = ovl_report_in_use(ofs, "lowerdir");
- if (err)
+ if (err) {
+ iput(trap);
goto out;
+ }
}
mnt = clone_private_mount(&stack[i]);
@@ -1575,10 +1612,6 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
if (!ofs->config.upperdir && numlower == 1) {
pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n");
return ERR_PTR(-EINVAL);
- } else if (!ofs->config.upperdir && ofs->config.nfs_export &&
- ofs->config.redirect_follow) {
- pr_warn("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
- ofs->config.nfs_export = false;
}
stack = kcalloc(numlower, sizeof(struct path), GFP_KERNEL);
@@ -1842,21 +1875,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (!ovl_upper_mnt(ofs))
sb->s_flags |= SB_RDONLY;
- if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
- /* index dir will act also as workdir */
- dput(ofs->workdir);
- ofs->workdir = NULL;
- iput(ofs->workdir_trap);
- ofs->workdir_trap = NULL;
-
+ if (!ovl_force_readonly(ofs) && ofs->config.index) {
err = ovl_get_indexdir(sb, ofs, oe, &upperpath);
if (err)
goto out_free_oe;
/* Force r/o mount with no index dir */
- if (ofs->indexdir)
- ofs->workdir = dget(ofs->indexdir);
- else
+ if (!ofs->indexdir)
sb->s_flags |= SB_RDONLY;
}
diff --git a/fs/read_write.c b/fs/read_write.c
index bbfa9b12b15e..4fb797822567 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -419,28 +419,42 @@ static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo
return ret;
}
-ssize_t __vfs_read(struct file *file, char __user *buf, size_t count,
- loff_t *pos)
+ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
{
+ mm_segment_t old_fs = get_fs();
+ ssize_t ret;
+
+ if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ)))
+ return -EINVAL;
+ if (!(file->f_mode & FMODE_CAN_READ))
+ return -EINVAL;
+
+ if (count > MAX_RW_COUNT)
+ count = MAX_RW_COUNT;
+ set_fs(KERNEL_DS);
if (file->f_op->read)
- return file->f_op->read(file, buf, count, pos);
+ ret = file->f_op->read(file, (void __user *)buf, count, pos);
else if (file->f_op->read_iter)
- return new_sync_read(file, buf, count, pos);
+ ret = new_sync_read(file, (void __user *)buf, count, pos);
else
- return -EINVAL;
+ ret = -EINVAL;
+ set_fs(old_fs);
+ if (ret > 0) {
+ fsnotify_access(file);
+ add_rchar(current, ret);
+ }
+ inc_syscr(current);
+ return ret;
}
ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
{
- mm_segment_t old_fs;
- ssize_t result;
+ ssize_t ret;
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- /* The cast to a user pointer is valid due to the set_fs() */
- result = vfs_read(file, (void __user *)buf, count, pos);
- set_fs(old_fs);
- return result;
+ ret = rw_verify_area(READ, file, pos, count);
+ if (ret)
+ return ret;
+ return __kernel_read(file, buf, count, pos);
}
EXPORT_SYMBOL(kernel_read);
@@ -456,17 +470,22 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
return -EFAULT;
ret = rw_verify_area(READ, file, pos, count);
- if (!ret) {
- if (count > MAX_RW_COUNT)
- count = MAX_RW_COUNT;
- ret = __vfs_read(file, buf, count, pos);
- if (ret > 0) {
- fsnotify_access(file);
- add_rchar(current, ret);
- }
- inc_syscr(current);
- }
+ if (ret)
+ return ret;
+ if (count > MAX_RW_COUNT)
+ count = MAX_RW_COUNT;
+ if (file->f_op->read)
+ ret = file->f_op->read(file, buf, count, pos);
+ else if (file->f_op->read_iter)
+ ret = new_sync_read(file, buf, count, pos);
+ else
+ ret = -EINVAL;
+ if (ret > 0) {
+ fsnotify_access(file);
+ add_rchar(current, ret);
+ }
+ inc_syscr(current);
return ret;
}
@@ -488,23 +507,15 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t
return ret;
}
-static ssize_t __vfs_write(struct file *file, const char __user *p,
- size_t count, loff_t *pos)
-{
- if (file->f_op->write)
- return file->f_op->write(file, p, count, pos);
- else if (file->f_op->write_iter)
- return new_sync_write(file, p, count, pos);
- else
- return -EINVAL;
-}
-
+/* caller is responsible for file_start_write/file_end_write */
ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
{
mm_segment_t old_fs;
const char __user *p;
ssize_t ret;
+ if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE)))
+ return -EBADF;
if (!(file->f_mode & FMODE_CAN_WRITE))
return -EINVAL;
@@ -513,7 +524,12 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t
p = (__force const char __user *)buf;
if (count > MAX_RW_COUNT)
count = MAX_RW_COUNT;
- ret = __vfs_write(file, p, count, pos);
+ if (file->f_op->write)
+ ret = file->f_op->write(file, p, count, pos);
+ else if (file->f_op->write_iter)
+ ret = new_sync_write(file, p, count, pos);
+ else
+ ret = -EINVAL;
set_fs(old_fs);
if (ret > 0) {
fsnotify_modify(file);
@@ -522,21 +538,20 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t
inc_syscw(current);
return ret;
}
-EXPORT_SYMBOL(__kernel_write);
ssize_t kernel_write(struct file *file, const void *buf, size_t count,
loff_t *pos)
{
- mm_segment_t old_fs;
- ssize_t res;
+ ssize_t ret;
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- /* The cast to a user pointer is valid due to the set_fs() */
- res = vfs_write(file, (__force const char __user *)buf, count, pos);
- set_fs(old_fs);
+ ret = rw_verify_area(WRITE, file, pos, count);
+ if (ret)
+ return ret;
- return res;
+ file_start_write(file);
+ ret = __kernel_write(file, buf, count, pos);
+ file_end_write(file);
+ return ret;
}
EXPORT_SYMBOL(kernel_write);
@@ -552,19 +567,23 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
return -EFAULT;
ret = rw_verify_area(WRITE, file, pos, count);
- if (!ret) {
- if (count > MAX_RW_COUNT)
- count = MAX_RW_COUNT;
- file_start_write(file);
- ret = __vfs_write(file, buf, count, pos);
- if (ret > 0) {
- fsnotify_modify(file);
- add_wchar(current, ret);
- }
- inc_syscw(current);
- file_end_write(file);
+ if (ret)
+ return ret;
+ if (count > MAX_RW_COUNT)
+ count = MAX_RW_COUNT;
+ file_start_write(file);
+ if (file->f_op->write)
+ ret = file->f_op->write(file, buf, count, pos);
+ else if (file->f_op->write_iter)
+ ret = new_sync_write(file, buf, count, pos);
+ else
+ ret = -EINVAL;
+ if (ret > 0) {
+ fsnotify_modify(file);
+ add_wchar(current, ret);
}
-
+ inc_syscw(current);
+ file_end_write(file);
return ret;
}
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 64f61330564a..76bb1c846845 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -175,7 +175,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
/* Extract the length of the metadata block */
data = page_address(bvec->bv_page) + bvec->bv_offset;
length = data[offset];
- if (offset <= bvec->bv_len - 1) {
+ if (offset < bvec->bv_len - 1) {
length |= data[offset + 1] << 8;
} else {
if (WARN_ON_ONCE(!bio_next_segment(bio, &iter_all))) {
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 07bc42d62673..abfb17f88f9a 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -607,14 +607,14 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
int nr_pages;
ssize_t ret;
- nr_pages = iov_iter_npages(from, BIO_MAX_PAGES);
- if (!nr_pages)
- return 0;
-
max = queue_max_zone_append_sectors(bdev_get_queue(bdev));
max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
iov_iter_truncate(from, max);
+ nr_pages = iov_iter_npages(from, BIO_MAX_PAGES);
+ if (!nr_pages)
+ return 0;
+
bio = bio_alloc_bioset(GFP_NOFS, nr_pages, &fs_bio_set);
if (!bio)
return -ENOMEM;
@@ -1119,7 +1119,7 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd,
char *file_name;
struct dentry *dir;
unsigned int n = 0;
- int ret = -ENOMEM;
+ int ret;
/* If the group is empty, there is nothing to do */
if (!zd->nr_zones[type])
@@ -1135,8 +1135,10 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd,
zgroup_name = "seq";
dir = zonefs_create_inode(sb->s_root, zgroup_name, NULL, type);
- if (!dir)
+ if (!dir) {
+ ret = -ENOMEM;
goto free;
+ }
/*
* The first zone contains the super block: skip it.
@@ -1174,8 +1176,10 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd,
* Use the file number within its group as file name.
*/
snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", n);
- if (!zonefs_create_inode(dir, file_name, zone, type))
+ if (!zonefs_create_inode(dir, file_name, zone, type)) {
+ ret = -ENOMEM;
goto free;
+ }
n++;
}