aboutsummaryrefslogtreecommitdiffstats
path: root/include/uapi/linux/tcp.h
diff options
context:
space:
mode:
authorArjun Roy <arjunroy@google.com>2020-12-02 14:53:49 -0800
committerJakub Kicinski <kuba@kernel.org>2020-12-04 13:40:53 -0800
commit94ab9eb9b234ddf23af04a4bc7e8db68e67b8778 (patch)
tree83b65a5607d34540f08b5e8a73a8342c4a0acef4 /include/uapi/linux/tcp.h
parentnet-zerocopy: Set zerocopy hint when data is copied (diff)
downloadlinux-dev-94ab9eb9b234ddf23af04a4bc7e8db68e67b8778.tar.xz
linux-dev-94ab9eb9b234ddf23af04a4bc7e8db68e67b8778.zip
net-zerocopy: Defer vm zap unless actually needed.
Zapping pages is required only if we are calling vm_insert_page into a region where pages had previously been mapped. Receive zerocopy allows reusing such regions, and hitherto called zap_page_range() before calling vm_insert_page() in that range. zap_page_range() can also be triggered from userspace with madvise(MADV_DONTNEED). If userspace is configured to call this before reusing a segment, or if there was nothing mapped at this virtual address to begin with, we can avoid calling zap_page_range() under the socket lock. That said, if userspace does not do that, then we are still responsible for calling zap_page_range(). This patch adds a flag that the user can use to hint to the kernel that a zap is not required. If the flag is not set, or if an older user application does not have a flags field at all, then the kernel calls zap_page_range as before. Also, if the flag is set but a zap is still required, the kernel performs that zap as necessary. Thus incorrectly indicating that a zap can be avoided does not change the correctness of operation. It also increases the batchsize for vm_insert_pages and prefetches the page struct for the batch since we're about to bump the refcount. An alternative mechanism could be to not have a flag, assume by default a zap is not needed, and fall back to zapping if needed. However, this would harm performance for older applications for which a zap is necessary, and thus we implement it with an explicit flag so newer applications can opt in. When using RPC-style traffic with medium sized (tens of KB) RPCs, this change yields an efficency improvement of about 30% for QPS/CPU usage. Signed-off-by: Arjun Roy <arjunroy@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include/uapi/linux/tcp.h')
-rw-r--r--include/uapi/linux/tcp.h2
1 files changed, 2 insertions, 0 deletions
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 62db78b9c1a0..13ceeb395eb8 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -343,6 +343,7 @@ struct tcp_diag_md5sig {
/* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */
+#define TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT 0x1
struct tcp_zerocopy_receive {
__u64 address; /* in: address of mapping */
__u32 length; /* in/out: number of bytes to map/mapped */
@@ -351,5 +352,6 @@ struct tcp_zerocopy_receive {
__s32 err; /* out: socket error */
__u64 copybuf_address; /* in: copybuf address (small reads) */
__s32 copybuf_len; /* in/out: copybuf bytes avail/used or error */
+ __u32 flags; /* in: flags */
};
#endif /* _UAPI_LINUX_TCP_H */