diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/block_dev.c | 12 | ||||
-rw-r--r-- | fs/cifs/cifsfs.c | 2 | ||||
-rw-r--r-- | fs/cifs/cifsfs.h | 2 | ||||
-rw-r--r-- | fs/cifs/file.c | 148 | ||||
-rw-r--r-- | fs/cifs/smb2maperror.c | 3 | ||||
-rw-r--r-- | fs/cifs/smb2pdu.c | 11 | ||||
-rw-r--r-- | fs/cifs/trace.h | 6 | ||||
-rw-r--r-- | fs/io_uring.c | 439 | ||||
-rw-r--r-- | fs/iomap.c | 12 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify_user.c | 12 | ||||
-rw-r--r-- | fs/notify/inotify/inotify_user.c | 7 | ||||
-rw-r--r-- | fs/udf/inode.c | 4 | ||||
-rw-r--r-- | fs/udf/truncate.c | 8 | ||||
-rw-r--r-- | fs/udf/udfdecl.h | 2 |
14 files changed, 358 insertions, 310 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index e9faa52bb489..78d3257435c0 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -336,12 +336,14 @@ static void blkdev_bio_end_io(struct bio *bio) if (should_dirty) { bio_check_pages_dirty(bio); } else { - struct bio_vec *bvec; - int i; - struct bvec_iter_all iter_all; + if (!bio_flagged(bio, BIO_NO_PAGE_REF)) { + struct bvec_iter_all iter_all; + struct bio_vec *bvec; + int i; - bio_for_each_segment_all(bvec, bio, i, iter_all) - put_page(bvec->bv_page); + bio_for_each_segment_all(bvec, bio, i, iter_all) + put_page(bvec->bv_page); + } bio_put(bio); } } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 217276b8b942..f9b71c12cc9f 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1008,7 +1008,7 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, unsigned int xid; int rc; - if (remap_flags & ~REMAP_FILE_ADVISORY) + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) return -EINVAL; cifs_dbg(FYI, "clone range\n"); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 142164ef1f05..5c0298b9998f 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -150,5 +150,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.18" +#define CIFS_VERSION "2.19" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 2a6d20c0ce02..89006e044973 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2632,43 +2632,56 @@ cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, struct TCP_Server_Info *server = tlink_tcon(wdata->cfile->tlink)->ses->server; - /* - * Wait for credits to resend this wdata. - * Note: we are attempting to resend the whole wdata not in segments - */ do { - rc = server->ops->wait_mtu_credits(server, wdata->bytes, &wsize, - &credits); + if (wdata->cfile->invalidHandle) { + rc = cifs_reopen_file(wdata->cfile, false); + if (rc == -EAGAIN) + continue; + else if (rc) + break; + } - if (rc) - goto out; - if (wsize < wdata->bytes) { - add_credits_and_wake_if(server, &credits, 0); - msleep(1000); - } - } while (wsize < wdata->bytes); + /* + * Wait for credits to resend this wdata. + * Note: we are attempting to resend the whole wdata not in + * segments + */ + do { + rc = server->ops->wait_mtu_credits(server, wdata->bytes, + &wsize, &credits); + if (rc) + goto fail; + + if (wsize < wdata->bytes) { + add_credits_and_wake_if(server, &credits, 0); + msleep(1000); + } + } while (wsize < wdata->bytes); + wdata->credits = credits; - wdata->credits = credits; - rc = -EAGAIN; - while (rc == -EAGAIN) { - rc = 0; - if (wdata->cfile->invalidHandle) - rc = cifs_reopen_file(wdata->cfile, false); - if (!rc) - rc = server->ops->async_writev(wdata, + rc = adjust_credits(server, &wdata->credits, wdata->bytes); + + if (!rc) { + if (wdata->cfile->invalidHandle) + rc = -EAGAIN; + else + rc = server->ops->async_writev(wdata, cifs_uncached_writedata_release); - } + } - if (!rc) { - list_add_tail(&wdata->list, wdata_list); - return 0; - } + /* If the write was successfully sent, we are done */ + if (!rc) { + list_add_tail(&wdata->list, wdata_list); + return 0; + } - add_credits_and_wake_if(server, &wdata->credits, 0); -out: - kref_put(&wdata->refcount, cifs_uncached_writedata_release); + /* Roll back credits and retry if needed */ + add_credits_and_wake_if(server, &wdata->credits, 0); + } while (rc == -EAGAIN); +fail: + kref_put(&wdata->refcount, cifs_uncached_writedata_release); return rc; } @@ -2896,12 +2909,12 @@ restart_loop: wdata->bytes, &tmp_from, ctx->cfile, cifs_sb, &tmp_list, ctx); + + kref_put(&wdata->refcount, + cifs_uncached_writedata_release); } list_splice(&tmp_list, &ctx->list); - - kref_put(&wdata->refcount, - cifs_uncached_writedata_release); goto restart_loop; } } @@ -3348,44 +3361,55 @@ static int cifs_resend_rdata(struct cifs_readdata *rdata, struct TCP_Server_Info *server = tlink_tcon(rdata->cfile->tlink)->ses->server; - /* - * Wait for credits to resend this rdata. - * Note: we are attempting to resend the whole rdata not in segments - */ do { - rc = server->ops->wait_mtu_credits(server, rdata->bytes, + if (rdata->cfile->invalidHandle) { + rc = cifs_reopen_file(rdata->cfile, true); + if (rc == -EAGAIN) + continue; + else if (rc) + break; + } + + /* + * Wait for credits to resend this rdata. + * Note: we are attempting to resend the whole rdata not in + * segments + */ + do { + rc = server->ops->wait_mtu_credits(server, rdata->bytes, &rsize, &credits); - if (rc) - goto out; + if (rc) + goto fail; - if (rsize < rdata->bytes) { - add_credits_and_wake_if(server, &credits, 0); - msleep(1000); - } - } while (rsize < rdata->bytes); + if (rsize < rdata->bytes) { + add_credits_and_wake_if(server, &credits, 0); + msleep(1000); + } + } while (rsize < rdata->bytes); + rdata->credits = credits; - rdata->credits = credits; - rc = -EAGAIN; - while (rc == -EAGAIN) { - rc = 0; - if (rdata->cfile->invalidHandle) - rc = cifs_reopen_file(rdata->cfile, true); - if (!rc) - rc = server->ops->async_readv(rdata); - } + rc = adjust_credits(server, &rdata->credits, rdata->bytes); + if (!rc) { + if (rdata->cfile->invalidHandle) + rc = -EAGAIN; + else + rc = server->ops->async_readv(rdata); + } - if (!rc) { - /* Add to aio pending list */ - list_add_tail(&rdata->list, rdata_list); - return 0; - } + /* If the read was successfully sent, we are done */ + if (!rc) { + /* Add to aio pending list */ + list_add_tail(&rdata->list, rdata_list); + return 0; + } - add_credits_and_wake_if(server, &rdata->credits, 0); -out: - kref_put(&rdata->refcount, - cifs_uncached_readdata_release); + /* Roll back credits and retry if needed */ + add_credits_and_wake_if(server, &rdata->credits, 0); + } while (rc == -EAGAIN); +fail: + kref_put(&rdata->refcount, cifs_uncached_readdata_release); return rc; } diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index 924269cec135..e32c264e3adb 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -1036,7 +1036,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_UNFINISHED_CONTEXT_DELETED, -EIO, "STATUS_UNFINISHED_CONTEXT_DELETED"}, {STATUS_NO_TGT_REPLY, -EIO, "STATUS_NO_TGT_REPLY"}, - {STATUS_OBJECTID_NOT_FOUND, -EIO, "STATUS_OBJECTID_NOT_FOUND"}, + /* Note that ENOATTTR and ENODATA are the same errno */ + {STATUS_OBJECTID_NOT_FOUND, -ENODATA, "STATUS_OBJECTID_NOT_FOUND"}, {STATUS_NO_IP_ADDRESSES, -EIO, "STATUS_NO_IP_ADDRESSES"}, {STATUS_WRONG_CREDENTIAL_HANDLE, -EIO, "STATUS_WRONG_CREDENTIAL_HANDLE"}, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index c399e09b76e6..21ac19ff19cb 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1628,9 +1628,16 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, iov[1].iov_base = unc_path; iov[1].iov_len = unc_path_len; - /* 3.11 tcon req must be signed if not encrypted. See MS-SMB2 3.2.4.1.1 */ + /* + * 3.11 tcon req must be signed if not encrypted. See MS-SMB2 3.2.4.1.1 + * unless it is guest or anonymous user. See MS-SMB2 3.2.5.3.1 + * (Samba servers don't always set the flag so also check if null user) + */ if ((ses->server->dialect == SMB311_PROT_ID) && - !smb3_encryption_required(tcon)) + !smb3_encryption_required(tcon) && + !(ses->session_flags & + (SMB2_SESSION_FLAG_IS_GUEST|SMB2_SESSION_FLAG_IS_NULL)) && + ((ses->user_name != NULL) || (ses->sectype == Kerberos))) req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED; memset(&rqst, 0, sizeof(struct smb_rqst)); diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h index fa226de48ef3..99c4d799c24b 100644 --- a/fs/cifs/trace.h +++ b/fs/cifs/trace.h @@ -549,19 +549,19 @@ DECLARE_EVENT_CLASS(smb3_tcon_class, __field(unsigned int, xid) __field(__u32, tid) __field(__u64, sesid) - __field(const char *, unc_name) + __string(name, unc_name) __field(int, rc) ), TP_fast_assign( __entry->xid = xid; __entry->tid = tid; __entry->sesid = sesid; - __entry->unc_name = unc_name; + __assign_str(name, unc_name); __entry->rc = rc; ), TP_printk("xid=%u sid=0x%llx tid=0x%x unc_name=%s rc=%d", __entry->xid, __entry->sesid, __entry->tid, - __entry->unc_name, __entry->rc) + __get_str(name), __entry->rc) ) #define DEFINE_SMB3_TCON_EVENT(name) \ diff --git a/fs/io_uring.c b/fs/io_uring.c index c88088d92613..6aaa30580a2b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -189,17 +189,28 @@ struct sqe_submit { bool needs_fixed_file; }; +/* + * First field must be the file pointer in all the + * iocb unions! See also 'struct kiocb' in <linux/fs.h> + */ struct io_poll_iocb { struct file *file; struct wait_queue_head *head; __poll_t events; - bool woken; + bool done; bool canceled; struct wait_queue_entry wait; }; +/* + * NOTE! Each of the iocb union members has the file pointer + * as the first entry in their struct definition. So you can + * access the file pointer through any of the sub-structs, + * or directly as just 'ki_filp' in this struct. + */ struct io_kiocb { union { + struct file *file; struct kiocb rw; struct io_poll_iocb poll; }; @@ -214,6 +225,7 @@ struct io_kiocb { #define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */ #define REQ_F_FIXED_FILE 4 /* ctx owns file */ #define REQ_F_SEQ_PREV 8 /* sequential with previous */ +#define REQ_F_PREPPED 16 /* prep already done */ u64 user_data; u64 error; @@ -355,20 +367,25 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data, } } -static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 ki_user_data, +static void io_cqring_ev_posted(struct io_ring_ctx *ctx) +{ + if (waitqueue_active(&ctx->wait)) + wake_up(&ctx->wait); + if (waitqueue_active(&ctx->sqo_wait)) + wake_up(&ctx->sqo_wait); +} + +static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data, long res, unsigned ev_flags) { unsigned long flags; spin_lock_irqsave(&ctx->completion_lock, flags); - io_cqring_fill_event(ctx, ki_user_data, res, ev_flags); + io_cqring_fill_event(ctx, user_data, res, ev_flags); io_commit_cqring(ctx); spin_unlock_irqrestore(&ctx->completion_lock, flags); - if (waitqueue_active(&ctx->wait)) - wake_up(&ctx->wait); - if (waitqueue_active(&ctx->sqo_wait)) - wake_up(&ctx->sqo_wait); + io_cqring_ev_posted(ctx); } static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs) @@ -382,13 +399,14 @@ static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs) static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, struct io_submit_state *state) { + gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; struct io_kiocb *req; if (!percpu_ref_tryget(&ctx->refs)) return NULL; if (!state) { - req = kmem_cache_alloc(req_cachep, __GFP_NOWARN); + req = kmem_cache_alloc(req_cachep, gfp); if (unlikely(!req)) goto out; } else if (!state->free_reqs) { @@ -396,10 +414,18 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, int ret; sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs)); - ret = kmem_cache_alloc_bulk(req_cachep, __GFP_NOWARN, sz, - state->reqs); - if (unlikely(ret <= 0)) - goto out; + ret = kmem_cache_alloc_bulk(req_cachep, gfp, sz, state->reqs); + + /* + * Bulk alloc is all-or-nothing. If we fail to get a batch, + * retry single alloc to be on the safe side. + */ + if (unlikely(ret <= 0)) { + state->reqs[0] = kmem_cache_alloc(req_cachep, gfp); + if (!state->reqs[0]) + goto out; + ret = 1; + } state->free_reqs = ret - 1; state->cur_req = 1; req = state->reqs[0]; @@ -411,7 +437,8 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, req->ctx = ctx; req->flags = 0; - refcount_set(&req->refs, 0); + /* one is dropped after submission, the other at completion */ + refcount_set(&req->refs, 2); return req; out: io_ring_drop_ctx_refs(ctx, 1); @@ -429,10 +456,16 @@ static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr) static void io_free_req(struct io_kiocb *req) { - if (!refcount_read(&req->refs) || refcount_dec_and_test(&req->refs)) { - io_ring_drop_ctx_refs(req->ctx, 1); - kmem_cache_free(req_cachep, req); - } + if (req->file && !(req->flags & REQ_F_FIXED_FILE)) + fput(req->file); + io_ring_drop_ctx_refs(req->ctx, 1); + kmem_cache_free(req_cachep, req); +} + +static void io_put_req(struct io_kiocb *req) +{ + if (refcount_dec_and_test(&req->refs)) + io_free_req(req); } /* @@ -442,44 +475,34 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, struct list_head *done) { void *reqs[IO_IOPOLL_BATCH]; - int file_count, to_free; - struct file *file = NULL; struct io_kiocb *req; + int to_free; - file_count = to_free = 0; + to_free = 0; while (!list_empty(done)) { req = list_first_entry(done, struct io_kiocb, list); list_del(&req->list); io_cqring_fill_event(ctx, req->user_data, req->error, 0); - - reqs[to_free++] = req; (*nr_events)++; - /* - * Batched puts of the same file, to avoid dirtying the - * file usage count multiple times, if avoidable. - */ - if (!(req->flags & REQ_F_FIXED_FILE)) { - if (!file) { - file = req->rw.ki_filp; - file_count = 1; - } else if (file == req->rw.ki_filp) { - file_count++; + if (refcount_dec_and_test(&req->refs)) { + /* If we're not using fixed files, we have to pair the + * completion part with the file put. Use regular + * completions for those, only batch free for fixed + * file. + */ + if (req->flags & REQ_F_FIXED_FILE) { + reqs[to_free++] = req; + if (to_free == ARRAY_SIZE(reqs)) + io_free_req_many(ctx, reqs, &to_free); } else { - fput_many(file, file_count); - file = req->rw.ki_filp; - file_count = 1; + io_free_req(req); } } - - if (to_free == ARRAY_SIZE(reqs)) - io_free_req_many(ctx, reqs, &to_free); } - io_commit_cqring(ctx); - if (file) - fput_many(file, file_count); + io_commit_cqring(ctx); io_free_req_many(ctx, reqs, &to_free); } @@ -602,21 +625,14 @@ static void kiocb_end_write(struct kiocb *kiocb) } } -static void io_fput(struct io_kiocb *req) -{ - if (!(req->flags & REQ_F_FIXED_FILE)) - fput(req->rw.ki_filp); -} - static void io_complete_rw(struct kiocb *kiocb, long res, long res2) { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); kiocb_end_write(kiocb); - io_fput(req); io_cqring_add_event(req->ctx, req->user_data, res, 0); - io_free_req(req); + io_put_req(req); } static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) @@ -731,31 +747,18 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, const struct io_uring_sqe *sqe = s->sqe; struct io_ring_ctx *ctx = req->ctx; struct kiocb *kiocb = &req->rw; - unsigned ioprio, flags; - int fd, ret; + unsigned ioprio; + int ret; + if (!req->file) + return -EBADF; /* For -EAGAIN retry, everything is already prepped */ - if (kiocb->ki_filp) + if (req->flags & REQ_F_PREPPED) return 0; - flags = READ_ONCE(sqe->flags); - fd = READ_ONCE(sqe->fd); + if (force_nonblock && !io_file_supports_async(req->file)) + force_nonblock = false; - if (flags & IOSQE_FIXED_FILE) { - if (unlikely(!ctx->user_files || - (unsigned) fd >= ctx->nr_user_files)) - return -EBADF; - kiocb->ki_filp = ctx->user_files[fd]; - req->flags |= REQ_F_FIXED_FILE; - } else { - if (s->needs_fixed_file) - return -EBADF; - kiocb->ki_filp = io_file_get(state, fd); - if (unlikely(!kiocb->ki_filp)) - return -EBADF; - if (force_nonblock && !io_file_supports_async(kiocb->ki_filp)) - force_nonblock = false; - } kiocb->ki_pos = READ_ONCE(sqe->off); kiocb->ki_flags = iocb_flags(kiocb->ki_filp); kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp)); @@ -764,7 +767,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, if (ioprio) { ret = ioprio_check_cap(ioprio); if (ret) - goto out_fput; + return ret; kiocb->ki_ioprio = ioprio; } else @@ -772,38 +775,26 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); if (unlikely(ret)) - goto out_fput; + return ret; if (force_nonblock) { kiocb->ki_flags |= IOCB_NOWAIT; req->flags |= REQ_F_FORCE_NONBLOCK; } if (ctx->flags & IORING_SETUP_IOPOLL) { - ret = -EOPNOTSUPP; if (!(kiocb->ki_flags & IOCB_DIRECT) || !kiocb->ki_filp->f_op->iopoll) - goto out_fput; + return -EOPNOTSUPP; req->error = 0; kiocb->ki_flags |= IOCB_HIPRI; kiocb->ki_complete = io_complete_rw_iopoll; } else { - if (kiocb->ki_flags & IOCB_HIPRI) { - ret = -EINVAL; - goto out_fput; - } + if (kiocb->ki_flags & IOCB_HIPRI) + return -EINVAL; kiocb->ki_complete = io_complete_rw; } + req->flags |= REQ_F_PREPPED; return 0; -out_fput: - if (!(flags & IOSQE_FIXED_FILE)) { - /* - * in case of error, we didn't use this file reference. drop it. - */ - if (state) - state->used_refs--; - io_file_put(state, kiocb->ki_filp); - } - return ret; } static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) @@ -864,6 +855,9 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); if (offset) iov_iter_advance(iter, offset); + + /* don't drop a reference to these pages */ + iter->type |= ITER_BVEC_FLAG_NO_REF; return 0; } @@ -887,7 +881,7 @@ static int io_import_iovec(struct io_ring_ctx *ctx, int rw, opcode = READ_ONCE(sqe->opcode); if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { - ssize_t ret = io_import_fixed(ctx, rw, sqe, iter); + int ret = io_import_fixed(ctx, rw, sqe, iter); *iovec = NULL; return ret; } @@ -945,31 +939,29 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len) async_list->io_end = io_end; } -static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s, - bool force_nonblock, struct io_submit_state *state) +static int io_read(struct io_kiocb *req, const struct sqe_submit *s, + bool force_nonblock, struct io_submit_state *state) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct kiocb *kiocb = &req->rw; struct iov_iter iter; struct file *file; size_t iov_count; - ssize_t ret; + int ret; ret = io_prep_rw(req, s, force_nonblock, state); if (ret) return ret; file = kiocb->ki_filp; - ret = -EBADF; if (unlikely(!(file->f_mode & FMODE_READ))) - goto out_fput; - ret = -EINVAL; + return -EBADF; if (unlikely(!file->f_op->read_iter)) - goto out_fput; + return -EINVAL; ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter); if (ret) - goto out_fput; + return ret; iov_count = iov_iter_count(&iter); ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count); @@ -991,38 +983,32 @@ static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s, } } kfree(iovec); -out_fput: - /* Hold on to the file for -EAGAIN */ - if (unlikely(ret && ret != -EAGAIN)) - io_fput(req); return ret; } -static ssize_t io_write(struct io_kiocb *req, const struct sqe_submit *s, - bool force_nonblock, struct io_submit_state *state) +static int io_write(struct io_kiocb *req, const struct sqe_submit *s, + bool force_nonblock, struct io_submit_state *state) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct kiocb *kiocb = &req->rw; struct iov_iter iter; struct file *file; size_t iov_count; - ssize_t ret; + int ret; ret = io_prep_rw(req, s, force_nonblock, state); if (ret) return ret; - ret = -EBADF; file = kiocb->ki_filp; if (unlikely(!(file->f_mode & FMODE_WRITE))) - goto out_fput; - ret = -EINVAL; + return -EBADF; if (unlikely(!file->f_op->write_iter)) - goto out_fput; + return -EINVAL; ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter); if (ret) - goto out_fput; + return ret; iov_count = iov_iter_count(&iter); @@ -1054,10 +1040,6 @@ static ssize_t io_write(struct io_kiocb *req, const struct sqe_submit *s, } out_free: kfree(iovec); -out_fput: - /* Hold on to the file for -EAGAIN */ - if (unlikely(ret && ret != -EAGAIN)) - io_fput(req); return ret; } @@ -1072,29 +1054,19 @@ static int io_nop(struct io_kiocb *req, u64 user_data) if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; - /* - * Twilight zone - it's possible that someone issued an opcode that - * has a file attached, then got -EAGAIN on submission, and changed - * the sqe before we retried it from async context. Avoid dropping - * a file reference for this malicious case, and flag the error. - */ - if (req->rw.ki_filp) { - err = -EBADF; - io_fput(req); - } io_cqring_add_event(ctx, user_data, err, 0); - io_free_req(req); + io_put_req(req); return 0; } static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_ring_ctx *ctx = req->ctx; - unsigned flags; - int fd; - /* Prep already done */ - if (req->rw.ki_filp) + if (!req->file) + return -EBADF; + /* Prep already done (EAGAIN retry) */ + if (req->flags & REQ_F_PREPPED) return 0; if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) @@ -1102,20 +1074,7 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) return -EINVAL; - fd = READ_ONCE(sqe->fd); - flags = READ_ONCE(sqe->flags); - - if (flags & IOSQE_FIXED_FILE) { - if (unlikely(!ctx->user_files || fd >= ctx->nr_user_files)) - return -EBADF; - req->rw.ki_filp = ctx->user_files[fd]; - req->flags |= REQ_F_FIXED_FILE; - } else { - req->rw.ki_filp = fget(fd); - if (unlikely(!req->rw.ki_filp)) - return -EBADF; - } - + req->flags |= REQ_F_PREPPED; return 0; } @@ -1144,9 +1103,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe, end > 0 ? end : LLONG_MAX, fsync_flags & IORING_FSYNC_DATASYNC); - io_fput(req); io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); - io_free_req(req); + io_put_req(req); return 0; } @@ -1204,15 +1162,16 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe) spin_unlock_irq(&ctx->completion_lock); io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); - io_free_req(req); + io_put_req(req); return 0; } -static void io_poll_complete(struct io_kiocb *req, __poll_t mask) +static void io_poll_complete(struct io_ring_ctx *ctx, struct io_kiocb *req, + __poll_t mask) { - io_cqring_add_event(req->ctx, req->user_data, mangle_poll(mask), 0); - io_fput(req); - io_free_req(req); + req->poll.done = true; + io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask), 0); + io_commit_cqring(ctx); } static void io_poll_complete_work(struct work_struct *work) @@ -1240,9 +1199,11 @@ static void io_poll_complete_work(struct work_struct *work) return; } list_del_init(&req->list); + io_poll_complete(ctx, req, mask); spin_unlock_irq(&ctx->completion_lock); - io_poll_complete(req, mask); + io_cqring_ev_posted(ctx); + io_put_req(req); } static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, @@ -1253,29 +1214,25 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, struct io_kiocb *req = container_of(poll, struct io_kiocb, poll); struct io_ring_ctx *ctx = req->ctx; __poll_t mask = key_to_poll(key); - - poll->woken = true; + unsigned long flags; /* for instances that support it check for an event match first: */ - if (mask) { - unsigned long flags; + if (mask && !(mask & poll->events)) + return 0; - if (!(mask & poll->events)) - return 0; + list_del_init(&poll->wait.entry); - /* try to complete the iocb inline if we can: */ - if (spin_trylock_irqsave(&ctx->completion_lock, flags)) { - list_del(&req->list); - spin_unlock_irqrestore(&ctx->completion_lock, flags); + if (mask && spin_trylock_irqsave(&ctx->completion_lock, flags)) { + list_del(&req->list); + io_poll_complete(ctx, req, mask); + spin_unlock_irqrestore(&ctx->completion_lock, flags); - list_del_init(&poll->wait.entry); - io_poll_complete(req, mask); - return 1; - } + io_cqring_ev_posted(ctx); + io_put_req(req); + } else { + queue_work(ctx->sqo_wq, &req->work); } - list_del_init(&poll->wait.entry); - queue_work(ctx->sqo_wq, &req->work); return 1; } @@ -1305,36 +1262,23 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_poll_iocb *poll = &req->poll; struct io_ring_ctx *ctx = req->ctx; struct io_poll_table ipt; - unsigned flags; + bool cancel = false; __poll_t mask; u16 events; - int fd; if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index) return -EINVAL; + if (!poll->file) + return -EBADF; INIT_WORK(&req->work, io_poll_complete_work); events = READ_ONCE(sqe->poll_events); poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP; - flags = READ_ONCE(sqe->flags); - fd = READ_ONCE(sqe->fd); - - if (flags & IOSQE_FIXED_FILE) { - if (unlikely(!ctx->user_files || fd >= ctx->nr_user_files)) - return -EBADF; - poll->file = ctx->user_files[fd]; - req->flags |= REQ_F_FIXED_FILE; - } else { - poll->file = fget(fd); - } - if (unlikely(!poll->file)) - return -EBADF; - poll->head = NULL; - poll->woken = false; + poll->done = false; poll->canceled = false; ipt.pt._qproc = io_poll_queue_proc; @@ -1346,56 +1290,44 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) INIT_LIST_HEAD(&poll->wait.entry); init_waitqueue_func_entry(&poll->wait, io_poll_wake); - /* one for removal from waitqueue, one for this function */ - refcount_set(&req->refs, 2); - mask = vfs_poll(poll->file, &ipt.pt) & poll->events; - if (unlikely(!poll->head)) { - /* we did not manage to set up a waitqueue, done */ - goto out; - } spin_lock_irq(&ctx->completion_lock); - spin_lock(&poll->head->lock); - if (poll->woken) { - /* wake_up context handles the rest */ - mask = 0; + if (likely(poll->head)) { + spin_lock(&poll->head->lock); + if (unlikely(list_empty(&poll->wait.entry))) { + if (ipt.error) + cancel = true; + ipt.error = 0; + mask = 0; + } + if (mask || ipt.error) + list_del_init(&poll->wait.entry); + else if (cancel) + WRITE_ONCE(poll->canceled, true); + else if (!poll->done) /* actually waiting for an event */ + list_add_tail(&req->list, &ctx->cancel_list); + spin_unlock(&poll->head->lock); + } + if (mask) { /* no async, we'd stolen it */ + req->error = mangle_poll(mask); ipt.error = 0; - } else if (mask || ipt.error) { - /* if we get an error or a mask we are done */ - WARN_ON_ONCE(list_empty(&poll->wait.entry)); - list_del_init(&poll->wait.entry); - } else { - /* actually waiting for an event */ - list_add_tail(&req->list, &ctx->cancel_list); + io_poll_complete(ctx, req, mask); } - spin_unlock(&poll->head->lock); spin_unlock_irq(&ctx->completion_lock); -out: - if (unlikely(ipt.error)) { - if (!(flags & IOSQE_FIXED_FILE)) - fput(poll->file); - /* - * Drop one of our refs to this req, __io_submit_sqe() will - * drop the other one since we're returning an error. - */ - io_free_req(req); - return ipt.error; + if (mask) { + io_cqring_ev_posted(ctx); + io_put_req(req); } - - if (mask) - io_poll_complete(req, mask); - io_free_req(req); - return 0; + return ipt.error; } static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, const struct sqe_submit *s, bool force_nonblock, struct io_submit_state *state) { - ssize_t ret; - int opcode; + int ret, opcode; if (unlikely(s->index >= ctx->sq_entries)) return -EINVAL; @@ -1524,10 +1456,13 @@ restart: break; cond_resched(); } while (1); + + /* drop submission reference */ + io_put_req(req); } if (ret) { io_cqring_add_event(ctx, sqe->user_data, ret, 0); - io_free_req(req); + io_put_req(req); } /* async context always use a copy of the sqe */ @@ -1614,11 +1549,55 @@ static bool io_add_to_prev_work(struct async_list *list, struct io_kiocb *req) return ret; } +static bool io_op_needs_file(const struct io_uring_sqe *sqe) +{ + int op = READ_ONCE(sqe->opcode); + + switch (op) { + case IORING_OP_NOP: + case IORING_OP_POLL_REMOVE: + return false; + default: + return true; + } +} + +static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, + struct io_submit_state *state, struct io_kiocb *req) +{ + unsigned flags; + int fd; + + flags = READ_ONCE(s->sqe->flags); + fd = READ_ONCE(s->sqe->fd); + + if (!io_op_needs_file(s->sqe)) { + req->file = NULL; + return 0; + } + + if (flags & IOSQE_FIXED_FILE) { + if (unlikely(!ctx->user_files || + (unsigned) fd >= ctx->nr_user_files)) + return -EBADF; + req->file = ctx->user_files[fd]; + req->flags |= REQ_F_FIXED_FILE; + } else { + if (s->needs_fixed_file) + return -EBADF; + req->file = io_file_get(state, fd); + if (unlikely(!req->file)) + return -EBADF; + } + + return 0; +} + static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, struct io_submit_state *state) { struct io_kiocb *req; - ssize_t ret; + int ret; /* enforce forwards compatibility on users */ if (unlikely(s->sqe->flags & ~IOSQE_FIXED_FILE)) @@ -1628,7 +1607,9 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, if (unlikely(!req)) return -EAGAIN; - req->rw.ki_filp = NULL; + ret = io_req_set_file(ctx, s, state, req); + if (unlikely(ret)) + goto out; ret = __io_submit_sqe(ctx, req, s, true, state); if (ret == -EAGAIN) { @@ -1649,11 +1630,23 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, INIT_WORK(&req->work, io_sq_wq_submit_work); queue_work(ctx->sqo_wq, &req->work); } - ret = 0; + + /* + * Queued up for async execution, worker will release + * submit reference when the iocb is actually + * submitted. + */ + return 0; } } + +out: + /* drop submission reference */ + io_put_req(req); + + /* and drop final reference, if we failed */ if (ret) - io_free_req(req); + io_put_req(req); return ret; } diff --git a/fs/iomap.c b/fs/iomap.c index 97cb9d486a7d..abdd18e404f8 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1589,12 +1589,14 @@ static void iomap_dio_bio_end_io(struct bio *bio) if (should_dirty) { bio_check_pages_dirty(bio); } else { - struct bio_vec *bvec; - int i; - struct bvec_iter_all iter_all; + if (!bio_flagged(bio, BIO_NO_PAGE_REF)) { + struct bvec_iter_all iter_all; + struct bio_vec *bvec; + int i; - bio_for_each_segment_all(bvec, bio, i, iter_all) - put_page(bvec->bv_page); + bio_for_each_segment_all(bvec, bio, i, iter_all) + put_page(bvec->bv_page); + } bio_put(bio); } } diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 56992b32c6bb..a90bb19dcfa2 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -208,6 +208,7 @@ static int copy_fid_to_user(struct fanotify_event *event, char __user *buf) { struct fanotify_event_info_fid info = { }; struct file_handle handle = { }; + unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh; size_t fh_len = event->fh_len; size_t len = fanotify_event_info_len(event); @@ -233,7 +234,16 @@ static int copy_fid_to_user(struct fanotify_event *event, char __user *buf) buf += sizeof(handle); len -= sizeof(handle); - if (copy_to_user(buf, fanotify_event_fh(event), fh_len)) + /* + * For an inline fh, copy through stack to exclude the copy from + * usercopy hardening protections. + */ + fh = fanotify_event_fh(event); + if (fh_len <= FANOTIFY_INLINE_FH_LEN) { + memcpy(bounce, fh, fh_len); + fh = bounce; + } + if (copy_to_user(buf, fh, fh_len)) return -EFAULT; /* Pad with 0's */ diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index e2901fbb9f76..7b53598c8804 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -519,8 +519,10 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); if (!fsn_mark) return -ENOENT; - else if (create) - return -EEXIST; + else if (create) { + ret = -EEXIST; + goto out; + } i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); @@ -548,6 +550,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, /* return the wd */ ret = i_mark->wd; +out: /* match the get from fsnotify_find_mark() */ fsnotify_put_mark(fsn_mark); diff --git a/fs/udf/inode.c b/fs/udf/inode.c index ae796e10f68b..e7276932e433 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1242,8 +1242,10 @@ set_size: truncate_setsize(inode, newsize); down_write(&iinfo->i_data_sem); udf_clear_extent_cache(inode); - udf_truncate_extents(inode); + err = udf_truncate_extents(inode); up_write(&iinfo->i_data_sem); + if (err) + return err; } update_time: inode->i_mtime = inode->i_ctime = current_time(inode); diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index b647f0bd150c..63a47f1e1d52 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -199,7 +199,7 @@ static void udf_update_alloc_ext_desc(struct inode *inode, * for making file shorter. For making file longer, udf_extend_file() has to * be used. */ -void udf_truncate_extents(struct inode *inode) +int udf_truncate_extents(struct inode *inode) { struct extent_position epos; struct kernel_lb_addr eloc, neloc = {}; @@ -224,7 +224,7 @@ void udf_truncate_extents(struct inode *inode) if (etype == -1) { /* We should extend the file? */ WARN_ON(byte_offset); - return; + return 0; } epos.offset -= adsize; extent_trunc(inode, &epos, &eloc, etype, elen, byte_offset); @@ -260,6 +260,9 @@ void udf_truncate_extents(struct inode *inode) epos.block = eloc; epos.bh = udf_tread(sb, udf_get_lb_pblock(sb, &eloc, 0)); + /* Error reading indirect block? */ + if (!epos.bh) + return -EIO; if (elen) indirect_ext_len = (elen + sb->s_blocksize - 1) >> @@ -283,4 +286,5 @@ void udf_truncate_extents(struct inode *inode) iinfo->i_lenExtents = inode->i_size; brelse(epos.bh); + return 0; } diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index ee246769dee4..d89ef71887fc 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -235,7 +235,7 @@ extern struct inode *udf_new_inode(struct inode *, umode_t); /* truncate.c */ extern void udf_truncate_tail_extent(struct inode *); extern void udf_discard_prealloc(struct inode *); -extern void udf_truncate_extents(struct inode *); +extern int udf_truncate_extents(struct inode *); /* balloc.c */ extern void udf_free_blocks(struct super_block *, struct inode *, |