aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--fs/io_uring.c42
-rw-r--r--include/uapi/linux/io_uring.h4
2 files changed, 37 insertions, 9 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2a89928ac1ba..7d3589e3a277 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -106,7 +106,8 @@
#define IORING_MAX_REG_BUFFERS (1U << 14)
#define SQE_COMMON_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_LINK | \
- IOSQE_IO_HARDLINK | IOSQE_ASYNC)
+ IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
+ IOSQE_CQE_SKIP_SUCCESS)
#define SQE_VALID_FLAGS (SQE_COMMON_FLAGS|IOSQE_BUFFER_SELECT|IOSQE_IO_DRAIN)
@@ -721,6 +722,7 @@ enum {
REQ_F_HARDLINK_BIT = IOSQE_IO_HARDLINK_BIT,
REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT,
REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT,
+ REQ_F_CQE_SKIP_BIT = IOSQE_CQE_SKIP_SUCCESS_BIT,
/* first byte is taken by user flags, shift it to not overlap */
REQ_F_FAIL_BIT = 8,
@@ -737,6 +739,7 @@ enum {
REQ_F_REFCOUNT_BIT,
REQ_F_ARM_LTIMEOUT_BIT,
REQ_F_ASYNC_DATA_BIT,
+ REQ_F_SKIP_LINK_CQES_BIT,
/* keep async read/write and isreg together and in order */
REQ_F_SUPPORT_NOWAIT_BIT,
REQ_F_ISREG_BIT,
@@ -758,6 +761,8 @@ enum {
REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT),
/* IOSQE_BUFFER_SELECT */
REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT),
+ /* IOSQE_CQE_SKIP_SUCCESS */
+ REQ_F_CQE_SKIP = BIT(REQ_F_CQE_SKIP_BIT),
/* fail rest of links */
REQ_F_FAIL = BIT(REQ_F_FAIL_BIT),
@@ -791,6 +796,8 @@ enum {
REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT),
/* ->async_data allocated */
REQ_F_ASYNC_DATA = BIT(REQ_F_ASYNC_DATA_BIT),
+ /* don't post CQEs while failing linked requests */
+ REQ_F_SKIP_LINK_CQES = BIT(REQ_F_SKIP_LINK_CQES_BIT),
};
struct async_poll {
@@ -1301,6 +1308,10 @@ static inline bool req_has_async_data(struct io_kiocb *req)
static inline void req_set_fail(struct io_kiocb *req)
{
req->flags |= REQ_F_FAIL;
+ if (req->flags & REQ_F_CQE_SKIP) {
+ req->flags &= ~REQ_F_CQE_SKIP;
+ req->flags |= REQ_F_SKIP_LINK_CQES;
+ }
}
static inline void req_fail_link_node(struct io_kiocb *req, int res)
@@ -1843,7 +1854,8 @@ static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
{
- __io_fill_cqe(req->ctx, req->user_data, res, cflags);
+ if (!(req->flags & REQ_F_CQE_SKIP))
+ __io_fill_cqe(req->ctx, req->user_data, res, cflags);
}
static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
@@ -1859,7 +1871,8 @@ static void io_req_complete_post(struct io_kiocb *req, s32 res,
struct io_ring_ctx *ctx = req->ctx;
spin_lock(&ctx->completion_lock);
- __io_fill_cqe(ctx, req->user_data, res, cflags);
+ if (!(req->flags & REQ_F_CQE_SKIP))
+ __io_fill_cqe(ctx, req->user_data, res, cflags);
/*
* If we're the last reference to this request, add to our locked
* free_list cache.
@@ -2067,6 +2080,7 @@ static bool io_kill_linked_timeout(struct io_kiocb *req)
link->timeout.head = NULL;
if (hrtimer_try_to_cancel(&io->timer) != -1) {
list_del(&link->timeout.list);
+ /* leave REQ_F_CQE_SKIP to io_fill_cqe_req */
io_fill_cqe_req(link, -ECANCELED, 0);
io_put_req_deferred(link);
return true;
@@ -2079,6 +2093,7 @@ static void io_fail_links(struct io_kiocb *req)
__must_hold(&req->ctx->completion_lock)
{
struct io_kiocb *nxt, *link = req->link;
+ bool ignore_cqes = req->flags & REQ_F_SKIP_LINK_CQES;
req->link = NULL;
while (link) {
@@ -2091,7 +2106,10 @@ static void io_fail_links(struct io_kiocb *req)
link->link = NULL;
trace_io_uring_fail_link(req, link);
- io_fill_cqe_req(link, res, 0);
+ if (!ignore_cqes) {
+ link->flags &= ~REQ_F_CQE_SKIP;
+ io_fill_cqe_req(link, res, 0);
+ }
io_put_req_deferred(link);
link = nxt;
}
@@ -2108,6 +2126,7 @@ static bool io_disarm_next(struct io_kiocb *req)
req->flags &= ~REQ_F_ARM_LTIMEOUT;
if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
io_remove_next_linked(req);
+ /* leave REQ_F_CQE_SKIP to io_fill_cqe_req */
io_fill_cqe_req(link, -ECANCELED, 0);
io_put_req_deferred(link);
posted = true;
@@ -2372,8 +2391,9 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
struct io_kiocb *req = container_of(node, struct io_kiocb,
comp_list);
- __io_fill_cqe(ctx, req->user_data, req->result,
- req->cflags);
+ if (!(req->flags & REQ_F_CQE_SKIP))
+ __io_fill_cqe(ctx, req->user_data, req->result,
+ req->cflags);
}
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
@@ -2503,12 +2523,14 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
prev = start;
wq_list_for_each_resume(pos, prev) {
struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list);
+ u32 cflags;
/* order with io_complete_rw_iopoll(), e.g. ->result updates */
if (!smp_load_acquire(&req->iopoll_completed))
break;
- __io_fill_cqe(ctx, req->user_data, req->result,
- io_put_rw_kbuf(req));
+ cflags = io_put_rw_kbuf(req);
+ if (!(req->flags & REQ_F_CQE_SKIP))
+ __io_fill_cqe(ctx, req->user_data, req->result, cflags);
nr_events++;
}
@@ -5832,6 +5854,8 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
flags = READ_ONCE(sqe->len);
if (flags & ~IORING_POLL_ADD_MULTI)
return -EINVAL;
+ if ((flags & IORING_POLL_ADD_MULTI) && (req->flags & REQ_F_CQE_SKIP))
+ return -EINVAL;
io_req_set_refcount(req);
poll->events = io_poll_parse_events(sqe, flags);
@@ -10442,7 +10466,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED |
IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS |
- IORING_FEAT_RSRC_TAGS;
+ IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP;
if (copy_to_user(params, p, sizeof(*p))) {
ret = -EFAULT;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index c45b5e9a9387..787f491f0d2a 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -70,6 +70,7 @@ enum {
IOSQE_IO_HARDLINK_BIT,
IOSQE_ASYNC_BIT,
IOSQE_BUFFER_SELECT_BIT,
+ IOSQE_CQE_SKIP_SUCCESS_BIT,
};
/*
@@ -87,6 +88,8 @@ enum {
#define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT)
/* select buffer from sqe->buf_group */
#define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT)
+/* don't post CQE if request succeeded */
+#define IOSQE_CQE_SKIP_SUCCESS (1U << IOSQE_CQE_SKIP_SUCCESS_BIT)
/*
* io_uring_setup() flags
@@ -289,6 +292,7 @@ struct io_uring_params {
#define IORING_FEAT_EXT_ARG (1U << 8)
#define IORING_FEAT_NATIVE_WORKERS (1U << 9)
#define IORING_FEAT_RSRC_TAGS (1U << 10)
+#define IORING_FEAT_CQE_SKIP (1U << 11)
/*
* io_uring_register(2) opcodes and arguments