From 577e4432f3ac810049cb7e6b71f4d96ec7c6e894 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 25 Jan 2024 10:33:17 +0000 Subject: tcp: add sanity checks to rx zerocopy TCP rx zerocopy intent is to map pages initially allocated from NIC drivers, not pages owned by a fs. This patch adds to can_map_frag() these additional checks: - Page must not be a compound one. - page->mapping must be NULL. This fixes the panic reported by ZhangPeng. syzbot was able to loopback packets built with sendfile(), mapping pages owned by an ext4 file to TCP rx zerocopy. r3 = socket$inet_tcp(0x2, 0x1, 0x0) mmap(&(0x7f0000ff9000/0x4000)=nil, 0x4000, 0x0, 0x12, r3, 0x0) r4 = socket$inet_tcp(0x2, 0x1, 0x0) bind$inet(r4, &(0x7f0000000000)={0x2, 0x4e24, @multicast1}, 0x10) connect$inet(r4, &(0x7f00000006c0)={0x2, 0x4e24, @empty}, 0x10) r5 = openat$dir(0xffffffffffffff9c, &(0x7f00000000c0)='./file0\x00', 0x181e42, 0x0) fallocate(r5, 0x0, 0x0, 0x85b8) sendfile(r4, r5, 0x0, 0x8ba0) getsockopt$inet_tcp_TCP_ZEROCOPY_RECEIVE(r4, 0x6, 0x23, &(0x7f00000001c0)={&(0x7f0000ffb000/0x3000)=nil, 0x3000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, &(0x7f0000000440)=0x40) r6 = openat$dir(0xffffffffffffff9c, &(0x7f00000000c0)='./file0\x00', 0x181e42, 0x0) Fixes: 93ab6cc69162 ("tcp: implement mmap() for zero copy receive") Link: https://lore.kernel.org/netdev/5106a58e-04da-372a-b836-9d3d0bd2507b@huawei.com/T/ Reported-and-bisected-by: ZhangPeng Signed-off-by: Eric Dumazet Cc: Arjun Roy Cc: Matthew Wilcox Cc: linux-mm@vger.kernel.org Cc: Andrew Morton Cc: linux-fsdevel@vger.kernel.org Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a1c6de385cce..7e2481b9eae1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1786,7 +1786,17 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb, static bool can_map_frag(const skb_frag_t *frag) { - return skb_frag_size(frag) == PAGE_SIZE && !skb_frag_off(frag); + struct page *page; + + if (skb_frag_size(frag) != PAGE_SIZE || skb_frag_off(frag)) + return false; + + page = skb_frag_page(frag); + + if (PageCompound(page) || page->mapping) + return false; + + return true; } static int find_next_mappable_frag(const skb_frag_t *frag, -- cgit v1.2.3-59-g8ed1b From 119ff04864a24470b1e531bb53e5c141aa8fefb0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Feb 2024 14:43:21 +0000 Subject: tcp: move tp->scaling_ratio to tcp_sock_read_txrx group tp->scaling_ratio is a read mostly field, used in rx and tx fast paths. Fixes: d5fed5addb2b ("tcp: reorganize tcp_sock fast path variables") Signed-off-by: Eric Dumazet Cc: Coco Li Cc: Wei Wang Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- Documentation/networking/net_cachelines/tcp_sock.rst | 2 +- include/linux/tcp.h | 2 +- net/ipv4/tcp.c | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst b/Documentation/networking/net_cachelines/tcp_sock.rst index 97d7a5c8e01c..803912291479 100644 --- a/Documentation/networking/net_cachelines/tcp_sock.rst +++ b/Documentation/networking/net_cachelines/tcp_sock.rst @@ -38,7 +38,7 @@ u32 max_window read_mostly - u32 mss_cache read_mostly read_mostly tcp_rate_check_app_limited,tcp_current_mss,tcp_sync_mss,tcp_sndbuf_expand,tcp_tso_should_defer(tx);tcp_update_pacing_rate,tcp_clean_rtx_queue(rx) u32 window_clamp read_mostly read_write tcp_rcv_space_adjust,__tcp_select_window u32 rcv_ssthresh read_mostly - __tcp_select_window -u82 scaling_ratio +u8 scaling_ratio read_mostly read_mostly tcp_win_from_space struct tcp_rack u16 advmss - read_mostly tcp_rcv_space_adjust u8 compressed_ack diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 89b290d8c8dc..168f5dca6609 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -221,6 +221,7 @@ struct tcp_sock { u32 lost_out; /* Lost packets */ u32 sacked_out; /* SACK'd packets */ u16 tcp_header_len; /* Bytes of tcp header to send */ + u8 scaling_ratio; /* see tcp_win_from_space() */ u8 chrono_type : 2, /* current chronograph type */ repair : 1, is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ @@ -352,7 +353,6 @@ struct tcp_sock { u32 compressed_ack_rcv_nxt; struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ - u8 scaling_ratio; /* see tcp_win_from_space() */ /* Information of the most recently (s)acked skb */ struct tcp_rack { u64 mstamp; /* (Re)sent time of the skb */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7e2481b9eae1..c82dc42f57c6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4615,7 +4615,8 @@ static void __init tcp_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, prr_out); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, lost_out); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, sacked_out); - CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 31); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, scaling_ratio); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 32); /* RX read-mostly hotpath cache lines */ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, copied_seq); -- cgit v1.2.3-59-g8ed1b