aboutsummaryrefslogtreecommitdiffstats
path: root/io_uring/net.c
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring/net.c')
-rw-r--r--io_uring/net.c308
1 files changed, 222 insertions, 86 deletions
diff --git a/io_uring/net.c b/io_uring/net.c
index 60e392f7f2dc..caa6a803cb72 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -55,21 +55,14 @@ struct io_sr_msg {
struct user_msghdr __user *umsg;
void __user *buf;
};
+ unsigned len;
+ unsigned done_io;
unsigned msg_flags;
- unsigned flags;
- size_t len;
- size_t done_io;
-};
-
-struct io_sendzc {
- struct file *file;
- void __user *buf;
- size_t len;
- unsigned msg_flags;
- unsigned flags;
- unsigned addr_len;
+ u16 flags;
+ /* initialised and used only by !msg send variants */
+ u16 addr_len;
void __user *addr;
- size_t done_io;
+ /* used only for send zerocopy */
struct io_kiocb *notif;
};
@@ -126,28 +119,36 @@ static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
}
}
-static struct io_async_msghdr *io_recvmsg_alloc_async(struct io_kiocb *req,
- unsigned int issue_flags)
+static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req,
+ unsigned int issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_cache_entry *entry;
+ struct io_async_msghdr *hdr;
if (!(issue_flags & IO_URING_F_UNLOCKED) &&
(entry = io_alloc_cache_get(&ctx->netmsg_cache)) != NULL) {
- struct io_async_msghdr *hdr;
-
hdr = container_of(entry, struct io_async_msghdr, cache);
+ hdr->free_iov = NULL;
req->flags |= REQ_F_ASYNC_DATA;
req->async_data = hdr;
return hdr;
}
- if (!io_alloc_async_data(req))
- return req->async_data;
-
+ if (!io_alloc_async_data(req)) {
+ hdr = req->async_data;
+ hdr->free_iov = NULL;
+ return hdr;
+ }
return NULL;
}
+static inline struct io_async_msghdr *io_msg_alloc_async_prep(struct io_kiocb *req)
+{
+ /* ->prep_async is always called from the submission context */
+ return io_msg_alloc_async(req, 0);
+}
+
static int io_setup_async_msg(struct io_kiocb *req,
struct io_async_msghdr *kmsg,
unsigned int issue_flags)
@@ -156,17 +157,20 @@ static int io_setup_async_msg(struct io_kiocb *req,
if (req_has_async_data(req))
return -EAGAIN;
- async_msg = io_recvmsg_alloc_async(req, issue_flags);
+ async_msg = io_msg_alloc_async(req, issue_flags);
if (!async_msg) {
kfree(kmsg->free_iov);
return -ENOMEM;
}
req->flags |= REQ_F_NEED_CLEANUP;
memcpy(async_msg, kmsg, sizeof(*kmsg));
- async_msg->msg.msg_name = &async_msg->addr;
+ if (async_msg->msg.msg_name)
+ async_msg->msg.msg_name = &async_msg->addr;
/* if were using fast_iov, set it to the new one */
- if (!async_msg->free_iov)
- async_msg->msg.msg_iter.iov = async_msg->fast_iov;
+ if (!kmsg->free_iov) {
+ size_t fast_idx = kmsg->msg.msg_iter.iov - kmsg->fast_iov;
+ async_msg->msg.msg_iter.iov = &async_msg->fast_iov[fast_idx];
+ }
return -EAGAIN;
}
@@ -182,34 +186,34 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req,
&iomsg->free_iov);
}
-int io_sendzc_prep_async(struct io_kiocb *req)
+int io_send_prep_async(struct io_kiocb *req)
{
- struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc);
+ struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
struct io_async_msghdr *io;
int ret;
if (!zc->addr || req_has_async_data(req))
return 0;
- if (io_alloc_async_data(req))
+ io = io_msg_alloc_async_prep(req);
+ if (!io)
return -ENOMEM;
-
- io = req->async_data;
ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr);
return ret;
}
static int io_setup_async_addr(struct io_kiocb *req,
- struct sockaddr_storage *addr,
+ struct sockaddr_storage *addr_storage,
unsigned int issue_flags)
{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
struct io_async_msghdr *io;
- if (!addr || req_has_async_data(req))
+ if (!sr->addr || req_has_async_data(req))
return -EAGAIN;
- if (io_alloc_async_data(req))
+ io = io_msg_alloc_async(req, issue_flags);
+ if (!io)
return -ENOMEM;
- io = req->async_data;
- memcpy(&io->addr, addr, sizeof(io->addr));
+ memcpy(&io->addr, addr_storage, sizeof(io->addr));
return -EAGAIN;
}
@@ -217,6 +221,8 @@ int io_sendmsg_prep_async(struct io_kiocb *req)
{
int ret;
+ if (!io_msg_alloc_async_prep(req))
+ return -ENOMEM;
ret = io_sendmsg_copy_hdr(req, req->async_data);
if (!ret)
req->flags |= REQ_F_NEED_CLEANUP;
@@ -234,8 +240,14 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
- if (unlikely(sqe->file_index || sqe->addr2))
+ if (req->opcode == IORING_OP_SEND) {
+ if (READ_ONCE(sqe->__pad3[0]))
+ return -EINVAL;
+ sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+ sr->addr_len = READ_ONCE(sqe->addr_len);
+ } else if (sqe->addr2 || sqe->file_index) {
return -EINVAL;
+ }
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
@@ -291,13 +303,13 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
if (ret < min_ret) {
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
return io_setup_async_msg(req, kmsg, issue_flags);
- if (ret == -ERESTARTSYS)
- ret = -EINTR;
if (ret > 0 && io_net_retry(sock, flags)) {
sr->done_io += ret;
req->flags |= REQ_F_PARTIAL_IO;
return io_setup_async_msg(req, kmsg, issue_flags);
}
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
req_set_fail(req);
}
/* fast path, check for non-NULL to avoid function call */
@@ -315,6 +327,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
int io_send(struct io_kiocb *req, unsigned int issue_flags)
{
+ struct sockaddr_storage __address;
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
struct msghdr msg;
struct iovec iov;
@@ -323,9 +336,29 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
int min_ret = 0;
int ret;
+ msg.msg_name = NULL;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_namelen = 0;
+ msg.msg_ubuf = NULL;
+
+ if (sr->addr) {
+ if (req_has_async_data(req)) {
+ struct io_async_msghdr *io = req->async_data;
+
+ msg.msg_name = &io->addr;
+ } else {
+ ret = move_addr_to_kernel(sr->addr, sr->addr_len, &__address);
+ if (unlikely(ret < 0))
+ return ret;
+ msg.msg_name = (struct sockaddr *)&__address;
+ }
+ msg.msg_namelen = sr->addr_len;
+ }
+
if (!(req->flags & REQ_F_POLLED) &&
(sr->flags & IORING_RECVSEND_POLL_FIRST))
- return -EAGAIN;
+ return io_setup_async_addr(req, &__address, issue_flags);
sock = sock_from_file(req->file);
if (unlikely(!sock))
@@ -335,12 +368,6 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
if (unlikely(ret))
return ret;
- msg.msg_name = NULL;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_namelen = 0;
- msg.msg_ubuf = NULL;
-
flags = sr->msg_flags;
if (issue_flags & IO_URING_F_NONBLOCK)
flags |= MSG_DONTWAIT;
@@ -351,16 +378,17 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
ret = sock_sendmsg(sock, &msg);
if (ret < min_ret) {
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
- return -EAGAIN;
- if (ret == -ERESTARTSYS)
- ret = -EINTR;
+ return io_setup_async_addr(req, &__address, issue_flags);
+
if (ret > 0 && io_net_retry(sock, flags)) {
sr->len -= ret;
sr->buf += ret;
sr->done_io += ret;
req->flags |= REQ_F_PARTIAL_IO;
- return -EAGAIN;
+ return io_setup_async_addr(req, &__address, issue_flags);
}
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
req_set_fail(req);
}
if (ret >= 0)
@@ -454,7 +482,6 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
if (msg.msg_iovlen == 0) {
sr->len = 0;
- iomsg->free_iov = NULL;
} else if (msg.msg_iovlen > 1) {
return -EINVAL;
} else {
@@ -465,7 +492,6 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
if (clen < 0)
return -EINVAL;
sr->len = clen;
- iomsg->free_iov = NULL;
}
if (req->flags & REQ_F_APOLL_MULTISHOT) {
@@ -504,6 +530,8 @@ int io_recvmsg_prep_async(struct io_kiocb *req)
{
int ret;
+ if (!io_msg_alloc_async_prep(req))
+ return -ENOMEM;
ret = io_recvmsg_copy_hdr(req, req->async_data);
if (!ret)
req->flags |= REQ_F_NEED_CLEANUP;
@@ -751,13 +779,13 @@ retry_multishot:
}
return ret;
}
- if (ret == -ERESTARTSYS)
- ret = -EINTR;
if (ret > 0 && io_net_retry(sock, flags)) {
sr->done_io += ret;
req->flags |= REQ_F_PARTIAL_IO;
return io_setup_async_msg(req, kmsg, issue_flags);
}
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
req_set_fail(req);
} else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
req_set_fail(req);
@@ -847,8 +875,6 @@ retry_multishot:
return -EAGAIN;
}
- if (ret == -ERESTARTSYS)
- ret = -EINTR;
if (ret > 0 && io_net_retry(sock, flags)) {
sr->len -= ret;
sr->buf += ret;
@@ -856,6 +882,8 @@ retry_multishot:
req->flags |= REQ_F_PARTIAL_IO;
return -EAGAIN;
}
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
req_set_fail(req);
} else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
out_free:
@@ -879,23 +907,30 @@ out_free:
return ret;
}
-void io_sendzc_cleanup(struct io_kiocb *req)
+void io_send_zc_cleanup(struct io_kiocb *req)
{
- struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc);
+ struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
+ struct io_async_msghdr *io;
- zc->notif->flags |= REQ_F_CQE_SKIP;
- io_notif_flush(zc->notif);
- zc->notif = NULL;
+ if (req_has_async_data(req)) {
+ io = req->async_data;
+ /* might be ->fast_iov if *msg_copy_hdr failed */
+ if (io->free_iov != io->fast_iov)
+ kfree(io->free_iov);
+ }
+ if (zc->notif) {
+ io_notif_flush(zc->notif);
+ zc->notif = NULL;
+ }
}
-int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc);
+ struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *notif;
- if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3) ||
- READ_ONCE(sqe->__pad3[0]))
+ if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
return -EINVAL;
/* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
if (req->flags & REQ_F_CQE_SKIP)
@@ -922,14 +957,24 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
io_req_set_rsrc_node(notif, ctx, 0);
}
+ if (req->opcode == IORING_OP_SEND_ZC) {
+ if (READ_ONCE(sqe->__pad3[0]))
+ return -EINVAL;
+ zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+ zc->addr_len = READ_ONCE(sqe->addr_len);
+ } else {
+ if (unlikely(sqe->addr2 || sqe->file_index))
+ return -EINVAL;
+ if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF))
+ return -EINVAL;
+ }
+
zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
zc->len = READ_ONCE(sqe->len);
zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
if (zc->msg_flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
- zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
- zc->addr_len = READ_ONCE(sqe->addr_len);
zc->done_io = 0;
#ifdef CONFIG_COMPAT
@@ -939,6 +984,13 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0;
}
+static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb,
+ struct iov_iter *from, size_t length)
+{
+ skb_zcopy_downgrade_managed(skb);
+ return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
+}
+
static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
struct iov_iter *from, size_t length)
{
@@ -949,13 +1001,10 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
ssize_t copied = 0;
unsigned long truesize = 0;
- if (!shinfo->nr_frags)
+ if (!frag)
shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
-
- if (!skb_zcopy_managed(skb) || !iov_iter_is_bvec(from)) {
- skb_zcopy_downgrade_managed(skb);
+ else if (unlikely(!skb_zcopy_managed(skb)))
return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
- }
bi.bi_size = min(from->count, length);
bi.bi_bvec_done = from->iov_offset;
@@ -993,14 +1042,14 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
return ret;
}
-int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
+int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
{
- struct sockaddr_storage __address, *addr = NULL;
- struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc);
+ struct sockaddr_storage __address;
+ struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
struct msghdr msg;
struct iovec iov;
struct socket *sock;
- unsigned msg_flags, cflags;
+ unsigned msg_flags;
int ret, min_ret = 0;
sock = sock_from_file(req->file);
@@ -1016,26 +1065,26 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
if (req_has_async_data(req)) {
struct io_async_msghdr *io = req->async_data;
- msg.msg_name = addr = &io->addr;
+ msg.msg_name = &io->addr;
} else {
ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address);
if (unlikely(ret < 0))
return ret;
msg.msg_name = (struct sockaddr *)&__address;
- addr = &__address;
}
msg.msg_namelen = zc->addr_len;
}
if (!(req->flags & REQ_F_POLLED) &&
(zc->flags & IORING_RECVSEND_POLL_FIRST))
- return io_setup_async_addr(req, addr, issue_flags);
+ return io_setup_async_addr(req, &__address, issue_flags);
if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu,
(u64)(uintptr_t)zc->buf, zc->len);
if (unlikely(ret))
return ret;
+ msg.sg_from_iter = io_sg_from_iter;
} else {
ret = import_single_range(WRITE, zc->buf, zc->len, &iov,
&msg.msg_iter);
@@ -1044,6 +1093,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
ret = io_notif_account_mem(zc->notif, zc->len);
if (unlikely(ret))
return ret;
+ msg.sg_from_iter = io_sg_from_iter_iovec;
}
msg_flags = zc->msg_flags | MSG_ZEROCOPY;
@@ -1054,22 +1104,19 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
msg.msg_flags = msg_flags;
msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
- msg.sg_from_iter = io_sg_from_iter;
ret = sock_sendmsg(sock, &msg);
if (unlikely(ret < min_ret)) {
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
- return io_setup_async_addr(req, addr, issue_flags);
+ return io_setup_async_addr(req, &__address, issue_flags);
if (ret > 0 && io_net_retry(sock, msg.msg_flags)) {
zc->len -= ret;
zc->buf += ret;
zc->done_io += ret;
req->flags |= REQ_F_PARTIAL_IO;
- return io_setup_async_addr(req, addr, issue_flags);
+ return io_setup_async_addr(req, &__address, issue_flags);
}
- if (ret < 0 && !zc->done_io)
- zc->notif->flags |= REQ_F_CQE_SKIP;
if (ret == -ERESTARTSYS)
ret = -EINTR;
req_set_fail(req);
@@ -1080,13 +1127,102 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
else if (zc->done_io)
ret = zc->done_io;
- io_notif_flush(zc->notif);
- req->flags &= ~REQ_F_NEED_CLEANUP;
- cflags = ret >= 0 ? IORING_CQE_F_MORE : 0;
- io_req_set_res(req, ret, cflags);
+ /*
+ * If we're in io-wq we can't rely on tw ordering guarantees, defer
+ * flushing notif to io_send_zc_cleanup()
+ */
+ if (!(issue_flags & IO_URING_F_UNLOCKED)) {
+ io_notif_flush(zc->notif);
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ }
+ io_req_set_res(req, ret, IORING_CQE_F_MORE);
return IOU_OK;
}
+int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ struct io_async_msghdr iomsg, *kmsg;
+ struct socket *sock;
+ unsigned flags;
+ int ret, min_ret = 0;
+
+ sock = sock_from_file(req->file);
+ if (unlikely(!sock))
+ return -ENOTSOCK;
+
+ if (req_has_async_data(req)) {
+ kmsg = req->async_data;
+ } else {
+ ret = io_sendmsg_copy_hdr(req, &iomsg);
+ if (ret)
+ return ret;
+ kmsg = &iomsg;
+ }
+
+ if (!(req->flags & REQ_F_POLLED) &&
+ (sr->flags & IORING_RECVSEND_POLL_FIRST))
+ return io_setup_async_msg(req, kmsg, issue_flags);
+
+ flags = sr->msg_flags | MSG_ZEROCOPY;
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ flags |= MSG_DONTWAIT;
+ if (flags & MSG_WAITALL)
+ min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+
+ kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg;
+ kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
+ ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
+
+ if (unlikely(ret < min_ret)) {
+ if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
+ return io_setup_async_msg(req, kmsg, issue_flags);
+
+ if (ret > 0 && io_net_retry(sock, flags)) {
+ sr->done_io += ret;
+ req->flags |= REQ_F_PARTIAL_IO;
+ return io_setup_async_msg(req, kmsg, issue_flags);
+ }
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+ req_set_fail(req);
+ }
+ /* fast path, check for non-NULL to avoid function call */
+ if (kmsg->free_iov) {
+ kfree(kmsg->free_iov);
+ kmsg->free_iov = NULL;
+ }
+
+ io_netmsg_recycle(req, issue_flags);
+ if (ret >= 0)
+ ret += sr->done_io;
+ else if (sr->done_io)
+ ret = sr->done_io;
+
+ /*
+ * If we're in io-wq we can't rely on tw ordering guarantees, defer
+ * flushing notif to io_send_zc_cleanup()
+ */
+ if (!(issue_flags & IO_URING_F_UNLOCKED)) {
+ io_notif_flush(sr->notif);
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ }
+ io_req_set_res(req, ret, IORING_CQE_F_MORE);
+ return IOU_OK;
+}
+
+void io_sendrecv_fail(struct io_kiocb *req)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+
+ if (req->flags & REQ_F_PARTIAL_IO)
+ req->cqe.res = sr->done_io;
+
+ if ((req->flags & REQ_F_NEED_CLEANUP) &&
+ (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC))
+ req->cqe.flags |= IORING_CQE_F_MORE;
+}
+
int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);