aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c12
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/file.c148
-rw-r--r--fs/cifs/smb2maperror.c3
-rw-r--r--fs/cifs/smb2pdu.c11
-rw-r--r--fs/cifs/trace.h6
-rw-r--r--fs/io_uring.c439
-rw-r--r--fs/iomap.c12
-rw-r--r--fs/notify/fanotify/fanotify_user.c12
-rw-r--r--fs/notify/inotify/inotify_user.c7
-rw-r--r--fs/udf/inode.c4
-rw-r--r--fs/udf/truncate.c8
-rw-r--r--fs/udf/udfdecl.h2
14 files changed, 358 insertions, 310 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index e9faa52bb489..78d3257435c0 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -336,12 +336,14 @@ static void blkdev_bio_end_io(struct bio *bio)
if (should_dirty) {
bio_check_pages_dirty(bio);
} else {
- struct bio_vec *bvec;
- int i;
- struct bvec_iter_all iter_all;
+ if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
+ struct bvec_iter_all iter_all;
+ struct bio_vec *bvec;
+ int i;
- bio_for_each_segment_all(bvec, bio, i, iter_all)
- put_page(bvec->bv_page);
+ bio_for_each_segment_all(bvec, bio, i, iter_all)
+ put_page(bvec->bv_page);
+ }
bio_put(bio);
}
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 217276b8b942..f9b71c12cc9f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1008,7 +1008,7 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
unsigned int xid;
int rc;
- if (remap_flags & ~REMAP_FILE_ADVISORY)
+ if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
return -EINVAL;
cifs_dbg(FYI, "clone range\n");
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 142164ef1f05..5c0298b9998f 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -150,5 +150,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.18"
+#define CIFS_VERSION "2.19"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 2a6d20c0ce02..89006e044973 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2632,43 +2632,56 @@ cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
struct TCP_Server_Info *server =
tlink_tcon(wdata->cfile->tlink)->ses->server;
- /*
- * Wait for credits to resend this wdata.
- * Note: we are attempting to resend the whole wdata not in segments
- */
do {
- rc = server->ops->wait_mtu_credits(server, wdata->bytes, &wsize,
- &credits);
+ if (wdata->cfile->invalidHandle) {
+ rc = cifs_reopen_file(wdata->cfile, false);
+ if (rc == -EAGAIN)
+ continue;
+ else if (rc)
+ break;
+ }
- if (rc)
- goto out;
- if (wsize < wdata->bytes) {
- add_credits_and_wake_if(server, &credits, 0);
- msleep(1000);
- }
- } while (wsize < wdata->bytes);
+ /*
+ * Wait for credits to resend this wdata.
+ * Note: we are attempting to resend the whole wdata not in
+ * segments
+ */
+ do {
+ rc = server->ops->wait_mtu_credits(server, wdata->bytes,
+ &wsize, &credits);
+ if (rc)
+ goto fail;
+
+ if (wsize < wdata->bytes) {
+ add_credits_and_wake_if(server, &credits, 0);
+ msleep(1000);
+ }
+ } while (wsize < wdata->bytes);
+ wdata->credits = credits;
- wdata->credits = credits;
- rc = -EAGAIN;
- while (rc == -EAGAIN) {
- rc = 0;
- if (wdata->cfile->invalidHandle)
- rc = cifs_reopen_file(wdata->cfile, false);
- if (!rc)
- rc = server->ops->async_writev(wdata,
+ rc = adjust_credits(server, &wdata->credits, wdata->bytes);
+
+ if (!rc) {
+ if (wdata->cfile->invalidHandle)
+ rc = -EAGAIN;
+ else
+ rc = server->ops->async_writev(wdata,
cifs_uncached_writedata_release);
- }
+ }
- if (!rc) {
- list_add_tail(&wdata->list, wdata_list);
- return 0;
- }
+ /* If the write was successfully sent, we are done */
+ if (!rc) {
+ list_add_tail(&wdata->list, wdata_list);
+ return 0;
+ }
- add_credits_and_wake_if(server, &wdata->credits, 0);
-out:
- kref_put(&wdata->refcount, cifs_uncached_writedata_release);
+ /* Roll back credits and retry if needed */
+ add_credits_and_wake_if(server, &wdata->credits, 0);
+ } while (rc == -EAGAIN);
+fail:
+ kref_put(&wdata->refcount, cifs_uncached_writedata_release);
return rc;
}
@@ -2896,12 +2909,12 @@ restart_loop:
wdata->bytes, &tmp_from,
ctx->cfile, cifs_sb, &tmp_list,
ctx);
+
+ kref_put(&wdata->refcount,
+ cifs_uncached_writedata_release);
}
list_splice(&tmp_list, &ctx->list);
-
- kref_put(&wdata->refcount,
- cifs_uncached_writedata_release);
goto restart_loop;
}
}
@@ -3348,44 +3361,55 @@ static int cifs_resend_rdata(struct cifs_readdata *rdata,
struct TCP_Server_Info *server =
tlink_tcon(rdata->cfile->tlink)->ses->server;
- /*
- * Wait for credits to resend this rdata.
- * Note: we are attempting to resend the whole rdata not in segments
- */
do {
- rc = server->ops->wait_mtu_credits(server, rdata->bytes,
+ if (rdata->cfile->invalidHandle) {
+ rc = cifs_reopen_file(rdata->cfile, true);
+ if (rc == -EAGAIN)
+ continue;
+ else if (rc)
+ break;
+ }
+
+ /*
+ * Wait for credits to resend this rdata.
+ * Note: we are attempting to resend the whole rdata not in
+ * segments
+ */
+ do {
+ rc = server->ops->wait_mtu_credits(server, rdata->bytes,
&rsize, &credits);
- if (rc)
- goto out;
+ if (rc)
+ goto fail;
- if (rsize < rdata->bytes) {
- add_credits_and_wake_if(server, &credits, 0);
- msleep(1000);
- }
- } while (rsize < rdata->bytes);
+ if (rsize < rdata->bytes) {
+ add_credits_and_wake_if(server, &credits, 0);
+ msleep(1000);
+ }
+ } while (rsize < rdata->bytes);
+ rdata->credits = credits;
- rdata->credits = credits;
- rc = -EAGAIN;
- while (rc == -EAGAIN) {
- rc = 0;
- if (rdata->cfile->invalidHandle)
- rc = cifs_reopen_file(rdata->cfile, true);
- if (!rc)
- rc = server->ops->async_readv(rdata);
- }
+ rc = adjust_credits(server, &rdata->credits, rdata->bytes);
+ if (!rc) {
+ if (rdata->cfile->invalidHandle)
+ rc = -EAGAIN;
+ else
+ rc = server->ops->async_readv(rdata);
+ }
- if (!rc) {
- /* Add to aio pending list */
- list_add_tail(&rdata->list, rdata_list);
- return 0;
- }
+ /* If the read was successfully sent, we are done */
+ if (!rc) {
+ /* Add to aio pending list */
+ list_add_tail(&rdata->list, rdata_list);
+ return 0;
+ }
- add_credits_and_wake_if(server, &rdata->credits, 0);
-out:
- kref_put(&rdata->refcount,
- cifs_uncached_readdata_release);
+ /* Roll back credits and retry if needed */
+ add_credits_and_wake_if(server, &rdata->credits, 0);
+ } while (rc == -EAGAIN);
+fail:
+ kref_put(&rdata->refcount, cifs_uncached_readdata_release);
return rc;
}
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 924269cec135..e32c264e3adb 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -1036,7 +1036,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_UNFINISHED_CONTEXT_DELETED, -EIO,
"STATUS_UNFINISHED_CONTEXT_DELETED"},
{STATUS_NO_TGT_REPLY, -EIO, "STATUS_NO_TGT_REPLY"},
- {STATUS_OBJECTID_NOT_FOUND, -EIO, "STATUS_OBJECTID_NOT_FOUND"},
+ /* Note that ENOATTTR and ENODATA are the same errno */
+ {STATUS_OBJECTID_NOT_FOUND, -ENODATA, "STATUS_OBJECTID_NOT_FOUND"},
{STATUS_NO_IP_ADDRESSES, -EIO, "STATUS_NO_IP_ADDRESSES"},
{STATUS_WRONG_CREDENTIAL_HANDLE, -EIO,
"STATUS_WRONG_CREDENTIAL_HANDLE"},
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index c399e09b76e6..21ac19ff19cb 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1628,9 +1628,16 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
iov[1].iov_base = unc_path;
iov[1].iov_len = unc_path_len;
- /* 3.11 tcon req must be signed if not encrypted. See MS-SMB2 3.2.4.1.1 */
+ /*
+ * 3.11 tcon req must be signed if not encrypted. See MS-SMB2 3.2.4.1.1
+ * unless it is guest or anonymous user. See MS-SMB2 3.2.5.3.1
+ * (Samba servers don't always set the flag so also check if null user)
+ */
if ((ses->server->dialect == SMB311_PROT_ID) &&
- !smb3_encryption_required(tcon))
+ !smb3_encryption_required(tcon) &&
+ !(ses->session_flags &
+ (SMB2_SESSION_FLAG_IS_GUEST|SMB2_SESSION_FLAG_IS_NULL)) &&
+ ((ses->user_name != NULL) || (ses->sectype == Kerberos)))
req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
memset(&rqst, 0, sizeof(struct smb_rqst));
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index fa226de48ef3..99c4d799c24b 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -549,19 +549,19 @@ DECLARE_EVENT_CLASS(smb3_tcon_class,
__field(unsigned int, xid)
__field(__u32, tid)
__field(__u64, sesid)
- __field(const char *, unc_name)
+ __string(name, unc_name)
__field(int, rc)
),
TP_fast_assign(
__entry->xid = xid;
__entry->tid = tid;
__entry->sesid = sesid;
- __entry->unc_name = unc_name;
+ __assign_str(name, unc_name);
__entry->rc = rc;
),
TP_printk("xid=%u sid=0x%llx tid=0x%x unc_name=%s rc=%d",
__entry->xid, __entry->sesid, __entry->tid,
- __entry->unc_name, __entry->rc)
+ __get_str(name), __entry->rc)
)
#define DEFINE_SMB3_TCON_EVENT(name) \
diff --git a/fs/io_uring.c b/fs/io_uring.c
index c88088d92613..6aaa30580a2b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -189,17 +189,28 @@ struct sqe_submit {
bool needs_fixed_file;
};
+/*
+ * First field must be the file pointer in all the
+ * iocb unions! See also 'struct kiocb' in <linux/fs.h>
+ */
struct io_poll_iocb {
struct file *file;
struct wait_queue_head *head;
__poll_t events;
- bool woken;
+ bool done;
bool canceled;
struct wait_queue_entry wait;
};
+/*
+ * NOTE! Each of the iocb union members has the file pointer
+ * as the first entry in their struct definition. So you can
+ * access the file pointer through any of the sub-structs,
+ * or directly as just 'ki_filp' in this struct.
+ */
struct io_kiocb {
union {
+ struct file *file;
struct kiocb rw;
struct io_poll_iocb poll;
};
@@ -214,6 +225,7 @@ struct io_kiocb {
#define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */
#define REQ_F_FIXED_FILE 4 /* ctx owns file */
#define REQ_F_SEQ_PREV 8 /* sequential with previous */
+#define REQ_F_PREPPED 16 /* prep already done */
u64 user_data;
u64 error;
@@ -355,20 +367,25 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
}
}
-static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 ki_user_data,
+static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
+{
+ if (waitqueue_active(&ctx->wait))
+ wake_up(&ctx->wait);
+ if (waitqueue_active(&ctx->sqo_wait))
+ wake_up(&ctx->sqo_wait);
+}
+
+static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data,
long res, unsigned ev_flags)
{
unsigned long flags;
spin_lock_irqsave(&ctx->completion_lock, flags);
- io_cqring_fill_event(ctx, ki_user_data, res, ev_flags);
+ io_cqring_fill_event(ctx, user_data, res, ev_flags);
io_commit_cqring(ctx);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
- if (waitqueue_active(&ctx->wait))
- wake_up(&ctx->wait);
- if (waitqueue_active(&ctx->sqo_wait))
- wake_up(&ctx->sqo_wait);
+ io_cqring_ev_posted(ctx);
}
static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs)
@@ -382,13 +399,14 @@ static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs)
static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
struct io_submit_state *state)
{
+ gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
struct io_kiocb *req;
if (!percpu_ref_tryget(&ctx->refs))
return NULL;
if (!state) {
- req = kmem_cache_alloc(req_cachep, __GFP_NOWARN);
+ req = kmem_cache_alloc(req_cachep, gfp);
if (unlikely(!req))
goto out;
} else if (!state->free_reqs) {
@@ -396,10 +414,18 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
int ret;
sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs));
- ret = kmem_cache_alloc_bulk(req_cachep, __GFP_NOWARN, sz,
- state->reqs);
- if (unlikely(ret <= 0))
- goto out;
+ ret = kmem_cache_alloc_bulk(req_cachep, gfp, sz, state->reqs);
+
+ /*
+ * Bulk alloc is all-or-nothing. If we fail to get a batch,
+ * retry single alloc to be on the safe side.
+ */
+ if (unlikely(ret <= 0)) {
+ state->reqs[0] = kmem_cache_alloc(req_cachep, gfp);
+ if (!state->reqs[0])
+ goto out;
+ ret = 1;
+ }
state->free_reqs = ret - 1;
state->cur_req = 1;
req = state->reqs[0];
@@ -411,7 +437,8 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
req->ctx = ctx;
req->flags = 0;
- refcount_set(&req->refs, 0);
+ /* one is dropped after submission, the other at completion */
+ refcount_set(&req->refs, 2);
return req;
out:
io_ring_drop_ctx_refs(ctx, 1);
@@ -429,10 +456,16 @@ static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr)
static void io_free_req(struct io_kiocb *req)
{
- if (!refcount_read(&req->refs) || refcount_dec_and_test(&req->refs)) {
- io_ring_drop_ctx_refs(req->ctx, 1);
- kmem_cache_free(req_cachep, req);
- }
+ if (req->file && !(req->flags & REQ_F_FIXED_FILE))
+ fput(req->file);
+ io_ring_drop_ctx_refs(req->ctx, 1);
+ kmem_cache_free(req_cachep, req);
+}
+
+static void io_put_req(struct io_kiocb *req)
+{
+ if (refcount_dec_and_test(&req->refs))
+ io_free_req(req);
}
/*
@@ -442,44 +475,34 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
struct list_head *done)
{
void *reqs[IO_IOPOLL_BATCH];
- int file_count, to_free;
- struct file *file = NULL;
struct io_kiocb *req;
+ int to_free;
- file_count = to_free = 0;
+ to_free = 0;
while (!list_empty(done)) {
req = list_first_entry(done, struct io_kiocb, list);
list_del(&req->list);
io_cqring_fill_event(ctx, req->user_data, req->error, 0);
-
- reqs[to_free++] = req;
(*nr_events)++;
- /*
- * Batched puts of the same file, to avoid dirtying the
- * file usage count multiple times, if avoidable.
- */
- if (!(req->flags & REQ_F_FIXED_FILE)) {
- if (!file) {
- file = req->rw.ki_filp;
- file_count = 1;
- } else if (file == req->rw.ki_filp) {
- file_count++;
+ if (refcount_dec_and_test(&req->refs)) {
+ /* If we're not using fixed files, we have to pair the
+ * completion part with the file put. Use regular
+ * completions for those, only batch free for fixed
+ * file.
+ */
+ if (req->flags & REQ_F_FIXED_FILE) {
+ reqs[to_free++] = req;
+ if (to_free == ARRAY_SIZE(reqs))
+ io_free_req_many(ctx, reqs, &to_free);
} else {
- fput_many(file, file_count);
- file = req->rw.ki_filp;
- file_count = 1;
+ io_free_req(req);
}
}
-
- if (to_free == ARRAY_SIZE(reqs))
- io_free_req_many(ctx, reqs, &to_free);
}
- io_commit_cqring(ctx);
- if (file)
- fput_many(file, file_count);
+ io_commit_cqring(ctx);
io_free_req_many(ctx, reqs, &to_free);
}
@@ -602,21 +625,14 @@ static void kiocb_end_write(struct kiocb *kiocb)
}
}
-static void io_fput(struct io_kiocb *req)
-{
- if (!(req->flags & REQ_F_FIXED_FILE))
- fput(req->rw.ki_filp);
-}
-
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
kiocb_end_write(kiocb);
- io_fput(req);
io_cqring_add_event(req->ctx, req->user_data, res, 0);
- io_free_req(req);
+ io_put_req(req);
}
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
@@ -731,31 +747,18 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
const struct io_uring_sqe *sqe = s->sqe;
struct io_ring_ctx *ctx = req->ctx;
struct kiocb *kiocb = &req->rw;
- unsigned ioprio, flags;
- int fd, ret;
+ unsigned ioprio;
+ int ret;
+ if (!req->file)
+ return -EBADF;
/* For -EAGAIN retry, everything is already prepped */
- if (kiocb->ki_filp)
+ if (req->flags & REQ_F_PREPPED)
return 0;
- flags = READ_ONCE(sqe->flags);
- fd = READ_ONCE(sqe->fd);
+ if (force_nonblock && !io_file_supports_async(req->file))
+ force_nonblock = false;
- if (flags & IOSQE_FIXED_FILE) {
- if (unlikely(!ctx->user_files ||
- (unsigned) fd >= ctx->nr_user_files))
- return -EBADF;
- kiocb->ki_filp = ctx->user_files[fd];
- req->flags |= REQ_F_FIXED_FILE;
- } else {
- if (s->needs_fixed_file)
- return -EBADF;
- kiocb->ki_filp = io_file_get(state, fd);
- if (unlikely(!kiocb->ki_filp))
- return -EBADF;
- if (force_nonblock && !io_file_supports_async(kiocb->ki_filp))
- force_nonblock = false;
- }
kiocb->ki_pos = READ_ONCE(sqe->off);
kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp));
@@ -764,7 +767,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
if (ioprio) {
ret = ioprio_check_cap(ioprio);
if (ret)
- goto out_fput;
+ return ret;
kiocb->ki_ioprio = ioprio;
} else
@@ -772,38 +775,26 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
if (unlikely(ret))
- goto out_fput;
+ return ret;
if (force_nonblock) {
kiocb->ki_flags |= IOCB_NOWAIT;
req->flags |= REQ_F_FORCE_NONBLOCK;
}
if (ctx->flags & IORING_SETUP_IOPOLL) {
- ret = -EOPNOTSUPP;
if (!(kiocb->ki_flags & IOCB_DIRECT) ||
!kiocb->ki_filp->f_op->iopoll)
- goto out_fput;
+ return -EOPNOTSUPP;
req->error = 0;
kiocb->ki_flags |= IOCB_HIPRI;
kiocb->ki_complete = io_complete_rw_iopoll;
} else {
- if (kiocb->ki_flags & IOCB_HIPRI) {
- ret = -EINVAL;
- goto out_fput;
- }
+ if (kiocb->ki_flags & IOCB_HIPRI)
+ return -EINVAL;
kiocb->ki_complete = io_complete_rw;
}
+ req->flags |= REQ_F_PREPPED;
return 0;
-out_fput:
- if (!(flags & IOSQE_FIXED_FILE)) {
- /*
- * in case of error, we didn't use this file reference. drop it.
- */
- if (state)
- state->used_refs--;
- io_file_put(state, kiocb->ki_filp);
- }
- return ret;
}
static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
@@ -864,6 +855,9 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
if (offset)
iov_iter_advance(iter, offset);
+
+ /* don't drop a reference to these pages */
+ iter->type |= ITER_BVEC_FLAG_NO_REF;
return 0;
}
@@ -887,7 +881,7 @@ static int io_import_iovec(struct io_ring_ctx *ctx, int rw,
opcode = READ_ONCE(sqe->opcode);
if (opcode == IORING_OP_READ_FIXED ||
opcode == IORING_OP_WRITE_FIXED) {
- ssize_t ret = io_import_fixed(ctx, rw, sqe, iter);
+ int ret = io_import_fixed(ctx, rw, sqe, iter);
*iovec = NULL;
return ret;
}
@@ -945,31 +939,29 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
async_list->io_end = io_end;
}
-static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s,
- bool force_nonblock, struct io_submit_state *state)
+static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
+ bool force_nonblock, struct io_submit_state *state)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw;
struct iov_iter iter;
struct file *file;
size_t iov_count;
- ssize_t ret;
+ int ret;
ret = io_prep_rw(req, s, force_nonblock, state);
if (ret)
return ret;
file = kiocb->ki_filp;
- ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_READ)))
- goto out_fput;
- ret = -EINVAL;
+ return -EBADF;
if (unlikely(!file->f_op->read_iter))
- goto out_fput;
+ return -EINVAL;
ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter);
if (ret)
- goto out_fput;
+ return ret;
iov_count = iov_iter_count(&iter);
ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count);
@@ -991,38 +983,32 @@ static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s,
}
}
kfree(iovec);
-out_fput:
- /* Hold on to the file for -EAGAIN */
- if (unlikely(ret && ret != -EAGAIN))
- io_fput(req);
return ret;
}
-static ssize_t io_write(struct io_kiocb *req, const struct sqe_submit *s,
- bool force_nonblock, struct io_submit_state *state)
+static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
+ bool force_nonblock, struct io_submit_state *state)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw;
struct iov_iter iter;
struct file *file;
size_t iov_count;
- ssize_t ret;
+ int ret;
ret = io_prep_rw(req, s, force_nonblock, state);
if (ret)
return ret;
- ret = -EBADF;
file = kiocb->ki_filp;
if (unlikely(!(file->f_mode & FMODE_WRITE)))
- goto out_fput;
- ret = -EINVAL;
+ return -EBADF;
if (unlikely(!file->f_op->write_iter))
- goto out_fput;
+ return -EINVAL;
ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter);
if (ret)
- goto out_fput;
+ return ret;
iov_count = iov_iter_count(&iter);
@@ -1054,10 +1040,6 @@ static ssize_t io_write(struct io_kiocb *req, const struct sqe_submit *s,
}
out_free:
kfree(iovec);
-out_fput:
- /* Hold on to the file for -EAGAIN */
- if (unlikely(ret && ret != -EAGAIN))
- io_fput(req);
return ret;
}
@@ -1072,29 +1054,19 @@ static int io_nop(struct io_kiocb *req, u64 user_data)
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- /*
- * Twilight zone - it's possible that someone issued an opcode that
- * has a file attached, then got -EAGAIN on submission, and changed
- * the sqe before we retried it from async context. Avoid dropping
- * a file reference for this malicious case, and flag the error.
- */
- if (req->rw.ki_filp) {
- err = -EBADF;
- io_fput(req);
- }
io_cqring_add_event(ctx, user_data, err, 0);
- io_free_req(req);
+ io_put_req(req);
return 0;
}
static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_ring_ctx *ctx = req->ctx;
- unsigned flags;
- int fd;
- /* Prep already done */
- if (req->rw.ki_filp)
+ if (!req->file)
+ return -EBADF;
+ /* Prep already done (EAGAIN retry) */
+ if (req->flags & REQ_F_PREPPED)
return 0;
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
@@ -1102,20 +1074,7 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
return -EINVAL;
- fd = READ_ONCE(sqe->fd);
- flags = READ_ONCE(sqe->flags);
-
- if (flags & IOSQE_FIXED_FILE) {
- if (unlikely(!ctx->user_files || fd >= ctx->nr_user_files))
- return -EBADF;
- req->rw.ki_filp = ctx->user_files[fd];
- req->flags |= REQ_F_FIXED_FILE;
- } else {
- req->rw.ki_filp = fget(fd);
- if (unlikely(!req->rw.ki_filp))
- return -EBADF;
- }
-
+ req->flags |= REQ_F_PREPPED;
return 0;
}
@@ -1144,9 +1103,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
end > 0 ? end : LLONG_MAX,
fsync_flags & IORING_FSYNC_DATASYNC);
- io_fput(req);
io_cqring_add_event(req->ctx, sqe->user_data, ret, 0);
- io_free_req(req);
+ io_put_req(req);
return 0;
}
@@ -1204,15 +1162,16 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe)
spin_unlock_irq(&ctx->completion_lock);
io_cqring_add_event(req->ctx, sqe->user_data, ret, 0);
- io_free_req(req);
+ io_put_req(req);
return 0;
}
-static void io_poll_complete(struct io_kiocb *req, __poll_t mask)
+static void io_poll_complete(struct io_ring_ctx *ctx, struct io_kiocb *req,
+ __poll_t mask)
{
- io_cqring_add_event(req->ctx, req->user_data, mangle_poll(mask), 0);
- io_fput(req);
- io_free_req(req);
+ req->poll.done = true;
+ io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask), 0);
+ io_commit_cqring(ctx);
}
static void io_poll_complete_work(struct work_struct *work)
@@ -1240,9 +1199,11 @@ static void io_poll_complete_work(struct work_struct *work)
return;
}
list_del_init(&req->list);
+ io_poll_complete(ctx, req, mask);
spin_unlock_irq(&ctx->completion_lock);
- io_poll_complete(req, mask);
+ io_cqring_ev_posted(ctx);
+ io_put_req(req);
}
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
@@ -1253,29 +1214,25 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
struct io_kiocb *req = container_of(poll, struct io_kiocb, poll);
struct io_ring_ctx *ctx = req->ctx;
__poll_t mask = key_to_poll(key);
-
- poll->woken = true;
+ unsigned long flags;
/* for instances that support it check for an event match first: */
- if (mask) {
- unsigned long flags;
+ if (mask && !(mask & poll->events))
+ return 0;
- if (!(mask & poll->events))
- return 0;
+ list_del_init(&poll->wait.entry);
- /* try to complete the iocb inline if we can: */
- if (spin_trylock_irqsave(&ctx->completion_lock, flags)) {
- list_del(&req->list);
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ if (mask && spin_trylock_irqsave(&ctx->completion_lock, flags)) {
+ list_del(&req->list);
+ io_poll_complete(ctx, req, mask);
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
- list_del_init(&poll->wait.entry);
- io_poll_complete(req, mask);
- return 1;
- }
+ io_cqring_ev_posted(ctx);
+ io_put_req(req);
+ } else {
+ queue_work(ctx->sqo_wq, &req->work);
}
- list_del_init(&poll->wait.entry);
- queue_work(ctx->sqo_wq, &req->work);
return 1;
}
@@ -1305,36 +1262,23 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_poll_iocb *poll = &req->poll;
struct io_ring_ctx *ctx = req->ctx;
struct io_poll_table ipt;
- unsigned flags;
+ bool cancel = false;
__poll_t mask;
u16 events;
- int fd;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)
return -EINVAL;
+ if (!poll->file)
+ return -EBADF;
INIT_WORK(&req->work, io_poll_complete_work);
events = READ_ONCE(sqe->poll_events);
poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
- flags = READ_ONCE(sqe->flags);
- fd = READ_ONCE(sqe->fd);
-
- if (flags & IOSQE_FIXED_FILE) {
- if (unlikely(!ctx->user_files || fd >= ctx->nr_user_files))
- return -EBADF;
- poll->file = ctx->user_files[fd];
- req->flags |= REQ_F_FIXED_FILE;
- } else {
- poll->file = fget(fd);
- }
- if (unlikely(!poll->file))
- return -EBADF;
-
poll->head = NULL;
- poll->woken = false;
+ poll->done = false;
poll->canceled = false;
ipt.pt._qproc = io_poll_queue_proc;
@@ -1346,56 +1290,44 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
INIT_LIST_HEAD(&poll->wait.entry);
init_waitqueue_func_entry(&poll->wait, io_poll_wake);
- /* one for removal from waitqueue, one for this function */
- refcount_set(&req->refs, 2);
-
mask = vfs_poll(poll->file, &ipt.pt) & poll->events;
- if (unlikely(!poll->head)) {
- /* we did not manage to set up a waitqueue, done */
- goto out;
- }
spin_lock_irq(&ctx->completion_lock);
- spin_lock(&poll->head->lock);
- if (poll->woken) {
- /* wake_up context handles the rest */
- mask = 0;
+ if (likely(poll->head)) {
+ spin_lock(&poll->head->lock);
+ if (unlikely(list_empty(&poll->wait.entry))) {
+ if (ipt.error)
+ cancel = true;
+ ipt.error = 0;
+ mask = 0;
+ }
+ if (mask || ipt.error)
+ list_del_init(&poll->wait.entry);
+ else if (cancel)
+ WRITE_ONCE(poll->canceled, true);
+ else if (!poll->done) /* actually waiting for an event */
+ list_add_tail(&req->list, &ctx->cancel_list);
+ spin_unlock(&poll->head->lock);
+ }
+ if (mask) { /* no async, we'd stolen it */
+ req->error = mangle_poll(mask);
ipt.error = 0;
- } else if (mask || ipt.error) {
- /* if we get an error or a mask we are done */
- WARN_ON_ONCE(list_empty(&poll->wait.entry));
- list_del_init(&poll->wait.entry);
- } else {
- /* actually waiting for an event */
- list_add_tail(&req->list, &ctx->cancel_list);
+ io_poll_complete(ctx, req, mask);
}
- spin_unlock(&poll->head->lock);
spin_unlock_irq(&ctx->completion_lock);
-out:
- if (unlikely(ipt.error)) {
- if (!(flags & IOSQE_FIXED_FILE))
- fput(poll->file);
- /*
- * Drop one of our refs to this req, __io_submit_sqe() will
- * drop the other one since we're returning an error.
- */
- io_free_req(req);
- return ipt.error;
+ if (mask) {
+ io_cqring_ev_posted(ctx);
+ io_put_req(req);
}
-
- if (mask)
- io_poll_complete(req, mask);
- io_free_req(req);
- return 0;
+ return ipt.error;
}
static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
const struct sqe_submit *s, bool force_nonblock,
struct io_submit_state *state)
{
- ssize_t ret;
- int opcode;
+ int ret, opcode;
if (unlikely(s->index >= ctx->sq_entries))
return -EINVAL;
@@ -1524,10 +1456,13 @@ restart:
break;
cond_resched();
} while (1);
+
+ /* drop submission reference */
+ io_put_req(req);
}
if (ret) {
io_cqring_add_event(ctx, sqe->user_data, ret, 0);
- io_free_req(req);
+ io_put_req(req);
}
/* async context always use a copy of the sqe */
@@ -1614,11 +1549,55 @@ static bool io_add_to_prev_work(struct async_list *list, struct io_kiocb *req)
return ret;
}
+static bool io_op_needs_file(const struct io_uring_sqe *sqe)
+{
+ int op = READ_ONCE(sqe->opcode);
+
+ switch (op) {
+ case IORING_OP_NOP:
+ case IORING_OP_POLL_REMOVE:
+ return false;
+ default:
+ return true;
+ }
+}
+
+static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
+ struct io_submit_state *state, struct io_kiocb *req)
+{
+ unsigned flags;
+ int fd;
+
+ flags = READ_ONCE(s->sqe->flags);
+ fd = READ_ONCE(s->sqe->fd);
+
+ if (!io_op_needs_file(s->sqe)) {
+ req->file = NULL;
+ return 0;
+ }
+
+ if (flags & IOSQE_FIXED_FILE) {
+ if (unlikely(!ctx->user_files ||
+ (unsigned) fd >= ctx->nr_user_files))
+ return -EBADF;
+ req->file = ctx->user_files[fd];
+ req->flags |= REQ_F_FIXED_FILE;
+ } else {
+ if (s->needs_fixed_file)
+ return -EBADF;
+ req->file = io_file_get(state, fd);
+ if (unlikely(!req->file))
+ return -EBADF;
+ }
+
+ return 0;
+}
+
static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
struct io_submit_state *state)
{
struct io_kiocb *req;
- ssize_t ret;
+ int ret;
/* enforce forwards compatibility on users */
if (unlikely(s->sqe->flags & ~IOSQE_FIXED_FILE))
@@ -1628,7 +1607,9 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
if (unlikely(!req))
return -EAGAIN;
- req->rw.ki_filp = NULL;
+ ret = io_req_set_file(ctx, s, state, req);
+ if (unlikely(ret))
+ goto out;
ret = __io_submit_sqe(ctx, req, s, true, state);
if (ret == -EAGAIN) {
@@ -1649,11 +1630,23 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
INIT_WORK(&req->work, io_sq_wq_submit_work);
queue_work(ctx->sqo_wq, &req->work);
}
- ret = 0;
+
+ /*
+ * Queued up for async execution, worker will release
+ * submit reference when the iocb is actually
+ * submitted.
+ */
+ return 0;
}
}
+
+out:
+ /* drop submission reference */
+ io_put_req(req);
+
+ /* and drop final reference, if we failed */
if (ret)
- io_free_req(req);
+ io_put_req(req);
return ret;
}
diff --git a/fs/iomap.c b/fs/iomap.c
index 97cb9d486a7d..abdd18e404f8 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1589,12 +1589,14 @@ static void iomap_dio_bio_end_io(struct bio *bio)
if (should_dirty) {
bio_check_pages_dirty(bio);
} else {
- struct bio_vec *bvec;
- int i;
- struct bvec_iter_all iter_all;
+ if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
+ struct bvec_iter_all iter_all;
+ struct bio_vec *bvec;
+ int i;
- bio_for_each_segment_all(bvec, bio, i, iter_all)
- put_page(bvec->bv_page);
+ bio_for_each_segment_all(bvec, bio, i, iter_all)
+ put_page(bvec->bv_page);
+ }
bio_put(bio);
}
}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 56992b32c6bb..a90bb19dcfa2 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -208,6 +208,7 @@ static int copy_fid_to_user(struct fanotify_event *event, char __user *buf)
{
struct fanotify_event_info_fid info = { };
struct file_handle handle = { };
+ unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh;
size_t fh_len = event->fh_len;
size_t len = fanotify_event_info_len(event);
@@ -233,7 +234,16 @@ static int copy_fid_to_user(struct fanotify_event *event, char __user *buf)
buf += sizeof(handle);
len -= sizeof(handle);
- if (copy_to_user(buf, fanotify_event_fh(event), fh_len))
+ /*
+ * For an inline fh, copy through stack to exclude the copy from
+ * usercopy hardening protections.
+ */
+ fh = fanotify_event_fh(event);
+ if (fh_len <= FANOTIFY_INLINE_FH_LEN) {
+ memcpy(bounce, fh, fh_len);
+ fh = bounce;
+ }
+ if (copy_to_user(buf, fh, fh_len))
return -EFAULT;
/* Pad with 0's */
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index e2901fbb9f76..7b53598c8804 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -519,8 +519,10 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
if (!fsn_mark)
return -ENOENT;
- else if (create)
- return -EEXIST;
+ else if (create) {
+ ret = -EEXIST;
+ goto out;
+ }
i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
@@ -548,6 +550,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
/* return the wd */
ret = i_mark->wd;
+out:
/* match the get from fsnotify_find_mark() */
fsnotify_put_mark(fsn_mark);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index ae796e10f68b..e7276932e433 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1242,8 +1242,10 @@ set_size:
truncate_setsize(inode, newsize);
down_write(&iinfo->i_data_sem);
udf_clear_extent_cache(inode);
- udf_truncate_extents(inode);
+ err = udf_truncate_extents(inode);
up_write(&iinfo->i_data_sem);
+ if (err)
+ return err;
}
update_time:
inode->i_mtime = inode->i_ctime = current_time(inode);
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index b647f0bd150c..63a47f1e1d52 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -199,7 +199,7 @@ static void udf_update_alloc_ext_desc(struct inode *inode,
* for making file shorter. For making file longer, udf_extend_file() has to
* be used.
*/
-void udf_truncate_extents(struct inode *inode)
+int udf_truncate_extents(struct inode *inode)
{
struct extent_position epos;
struct kernel_lb_addr eloc, neloc = {};
@@ -224,7 +224,7 @@ void udf_truncate_extents(struct inode *inode)
if (etype == -1) {
/* We should extend the file? */
WARN_ON(byte_offset);
- return;
+ return 0;
}
epos.offset -= adsize;
extent_trunc(inode, &epos, &eloc, etype, elen, byte_offset);
@@ -260,6 +260,9 @@ void udf_truncate_extents(struct inode *inode)
epos.block = eloc;
epos.bh = udf_tread(sb,
udf_get_lb_pblock(sb, &eloc, 0));
+ /* Error reading indirect block? */
+ if (!epos.bh)
+ return -EIO;
if (elen)
indirect_ext_len =
(elen + sb->s_blocksize - 1) >>
@@ -283,4 +286,5 @@ void udf_truncate_extents(struct inode *inode)
iinfo->i_lenExtents = inode->i_size;
brelse(epos.bh);
+ return 0;
}
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index ee246769dee4..d89ef71887fc 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -235,7 +235,7 @@ extern struct inode *udf_new_inode(struct inode *, umode_t);
/* truncate.c */
extern void udf_truncate_tail_extent(struct inode *);
extern void udf_discard_prealloc(struct inode *);
-extern void udf_truncate_extents(struct inode *);
+extern int udf_truncate_extents(struct inode *);
/* balloc.c */
extern void udf_free_blocks(struct super_block *, struct inode *,