From 3d010c8031e39f5fa1e8b13ada77e0321091011f Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 26 Mar 2024 12:33:58 +0100 Subject: udp: do not accept non-tunnel GSO skbs landing in a tunnel When rx-udp-gro-forwarding is enabled UDP packets might be GROed when being forwarded. If such packets might land in a tunnel this can cause various issues and udp_gro_receive makes sure this isn't the case by looking for a matching socket. This is performed in udp4/6_gro_lookup_skb but only in the current netns. This is an issue with tunneled packets when the endpoint is in another netns. In such cases the packets will be GROed at the UDP level, which leads to various issues later on. The same thing can happen with rx-gro-list. We saw this with geneve packets being GROed at the UDP level. In such case gso_size is set; later the packet goes through the geneve rx path, the geneve header is pulled, the offset are adjusted and frag_list skbs are not adjusted with regard to geneve. When those skbs hit skb_fragment, it will misbehave. Different outcomes are possible depending on what the GROed skbs look like; from corrupted packets to kernel crashes. One example is a BUG_ON[1] triggered in skb_segment while processing the frag_list. Because gso_size is wrong (geneve header was pulled) skb_segment thinks there is "geneve header size" of data in frag_list, although it's in fact the next packet. The BUG_ON itself has nothing to do with the issue. This is only one of the potential issues. Looking up for a matching socket in udp_gro_receive is fragile: the lookup could be extended to all netns (not speaking about performances) but nothing prevents those packets from being modified in between and we could still not find a matching socket. It's OK to keep the current logic there as it should cover most cases but we also need to make sure we handle tunnel packets being GROed too early. This is done by extending the checks in udp_unexpected_gso: GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits and landing in a tunnel must be segmented. [1] kernel BUG at net/core/skbuff.c:4408! RIP: 0010:skb_segment+0xd2a/0xf70 __udp_gso_segment+0xaa/0x560 Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.") Fixes: 36707061d6ba ("udp: allow forwarding of plain (non-fraglisted) UDP GRO packets") Signed-off-by: Antoine Tenart Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/udp.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux/udp.h') diff --git a/include/linux/udp.h b/include/linux/udp.h index 3748e82b627b..17539d089666 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -150,6 +150,24 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, } } +DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); +#if IS_ENABLED(CONFIG_IPV6) +DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +#endif + +static inline bool udp_encap_needed(void) +{ + if (static_branch_unlikely(&udp_encap_needed_key)) + return true; + +#if IS_ENABLED(CONFIG_IPV6) + if (static_branch_unlikely(&udpv6_encap_needed_key)) + return true; +#endif + + return false; +} + static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) { if (!skb_is_gso(skb)) @@ -163,6 +181,16 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) !udp_test_bit(ACCEPT_FRAGLIST, sk)) return true; + /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still + * land in a tunnel as the socket check in udp_gro_receive cannot be + * foolproof. + */ + if (udp_encap_needed() && + READ_ONCE(udp_sk(sk)->encap_rcv) && + !(skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM))) + return true; + return false; } -- cgit v1.2.3-59-g8ed1b From 1382e3b6a3500c245e5278c66d210c02926f804f Mon Sep 17 00:00:00 2001 From: Yuri Benditovich Date: Thu, 11 Apr 2024 08:11:24 +0300 Subject: net: change maximum number of UDP segments to 128 The commit fc8b2a619469 ("net: more strict VIRTIO_NET_HDR_GSO_UDP_L4 validation") adds check of potential number of UDP segments vs UDP_MAX_SEGMENTS in linux/virtio_net.h. After this change certification test of USO guest-to-guest transmit on Windows driver for virtio-net device fails, for example with packet size of ~64K and mss of 536 bytes. In general the USO should not be more restrictive than TSO. Indeed, in case of unreasonably small mss a lot of segments can cause queue overflow and packet loss on the destination. Limit of 128 segments is good for any practical purpose, with minimal meaningful mss of 536 the maximal UDP packet will be divided to ~120 segments. The number of segments for UDP packets is validated vs UDP_MAX_SEGMENTS also in udp.c (v4,v6), this does not affect quest-to-guest path but does affect packets sent to host, for example. It is important to mention that UDP_MAX_SEGMENTS is kernel-only define and not available to user mode socket applications. In order to request MSS smaller than MTU the applications just uses setsockopt with SOL_UDP and UDP_SEGMENT and there is no limitations on socket API level. Fixes: fc8b2a619469 ("net: more strict VIRTIO_NET_HDR_GSO_UDP_L4 validation") Signed-off-by: Yuri Benditovich Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/udp.h | 2 +- tools/testing/selftests/net/udpgso.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/udp.h') diff --git a/include/linux/udp.h b/include/linux/udp.h index 17539d089666..e398e1dbd2d3 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -108,7 +108,7 @@ struct udp_sock { #define udp_assign_bit(nr, sk, val) \ assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val) -#define UDP_MAX_SEGMENTS (1 << 6UL) +#define UDP_MAX_SEGMENTS (1 << 7UL) #define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk) diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 1d975bf52af3..85b3baa3f7f3 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -34,7 +34,7 @@ #endif #ifndef UDP_MAX_SEGMENTS -#define UDP_MAX_SEGMENTS (1 << 6UL) +#define UDP_MAX_SEGMENTS (1 << 7UL) #endif #define CONST_MTU_TEST 1500 -- cgit v1.2.3-59-g8ed1b From b3e90f375b3c7ab85aef631ebb0ad8ce66cbf3fd Mon Sep 17 00:00:00 2001 From: Yoann Congal Date: Sun, 5 May 2024 10:03:42 +0200 Subject: printk: Change type of CONFIG_BASE_SMALL to bool CONFIG_BASE_SMALL is currently a type int but is only used as a boolean. So, change its type to bool and adapt all usages: CONFIG_BASE_SMALL == 0 becomes !IS_ENABLED(CONFIG_BASE_SMALL) and CONFIG_BASE_SMALL != 0 becomes IS_ENABLED(CONFIG_BASE_SMALL). Reviewed-by: Petr Mladek Reviewed-by: Greg Kroah-Hartman Reviewed-by: Masahiro Yamada Signed-off-by: Yoann Congal Link: https://lore.kernel.org/r/20240505080343.1471198-3-yoann.congal@smile.fr Signed-off-by: Petr Mladek --- arch/x86/include/asm/mpspec.h | 6 +++--- drivers/tty/vt/vc_screen.c | 2 +- include/linux/threads.h | 4 ++-- include/linux/udp.h | 2 +- include/linux/xarray.h | 2 +- init/Kconfig | 6 ++---- kernel/futex/core.c | 2 +- kernel/user.c | 2 +- 8 files changed, 12 insertions(+), 14 deletions(-) (limited to 'include/linux/udp.h') diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index c72c7ff78fcd..d593e52e6635 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -16,10 +16,10 @@ extern int pic_mode; * Summit or generic (i.e. installer) kernels need lots of bus entries. * Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */ -#if CONFIG_BASE_SMALL == 0 -# define MAX_MP_BUSSES 260 -#else +#ifdef CONFIG_BASE_SMALL # define MAX_MP_BUSSES 32 +#else +# define MAX_MP_BUSSES 260 #endif #define MAX_IRQ_SOURCES 256 diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c index 67e2cb7c96ee..da33c6c4691c 100644 --- a/drivers/tty/vt/vc_screen.c +++ b/drivers/tty/vt/vc_screen.c @@ -51,7 +51,7 @@ #include #define HEADER_SIZE 4u -#define CON_BUF_SIZE (CONFIG_BASE_SMALL ? 256 : PAGE_SIZE) +#define CON_BUF_SIZE (IS_ENABLED(CONFIG_BASE_SMALL) ? 256 : PAGE_SIZE) /* * Our minor space: diff --git a/include/linux/threads.h b/include/linux/threads.h index c34173e6c5f1..1674a471b0b4 100644 --- a/include/linux/threads.h +++ b/include/linux/threads.h @@ -25,13 +25,13 @@ /* * This controls the default maximum pid allocated to a process */ -#define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000) +#define PID_MAX_DEFAULT (IS_ENABLED(CONFIG_BASE_SMALL) ? 0x1000 : 0x8000) /* * A maximum of 4 million PIDs should be enough for a while. * [NOTE: PID/TIDs are limited to 2^30 ~= 1 billion, see FUTEX_TID_MASK.] */ -#define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \ +#define PID_MAX_LIMIT (IS_ENABLED(CONFIG_BASE_SMALL) ? PAGE_SIZE * 8 : \ (sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT)) /* diff --git a/include/linux/udp.h b/include/linux/udp.h index 3748e82b627b..2d21cdb41ad8 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -24,7 +24,7 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb) } #define UDP_HTABLE_SIZE_MIN_PERNET 128 -#define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256) +#define UDP_HTABLE_SIZE_MIN (IS_ENABLED(CONFIG_BASE_SMALL) ? 128 : 256) #define UDP_HTABLE_SIZE_MAX 65536 static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) diff --git a/include/linux/xarray.h b/include/linux/xarray.h index cb571dfcf4b1..3f81ee5f9fb9 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1141,7 +1141,7 @@ static inline void xa_release(struct xarray *xa, unsigned long index) * doubled the number of slots per node, we'd get only 3 nodes per 4kB page. */ #ifndef XA_CHUNK_SHIFT -#define XA_CHUNK_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) +#define XA_CHUNK_SHIFT (IS_ENABLED(CONFIG_BASE_SMALL) ? 4 : 6) #endif #define XA_CHUNK_SIZE (1UL << XA_CHUNK_SHIFT) #define XA_CHUNK_MASK (XA_CHUNK_SIZE - 1) diff --git a/init/Kconfig b/init/Kconfig index ff5b607c3218..4b517523d566 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -743,7 +743,7 @@ config LOG_CPU_MAX_BUF_SHIFT int "CPU kernel log buffer size contribution (13 => 8 KB, 17 => 128KB)" depends on SMP range 0 21 - default 0 if BASE_SMALL != 0 + default 0 if BASE_SMALL default 12 depends on PRINTK help @@ -1945,9 +1945,7 @@ config RT_MUTEXES default y if PREEMPT_RT config BASE_SMALL - int - default 0 if BASE_FULL - default 1 if !BASE_FULL + def_bool !BASE_FULL config MODULE_SIG_FORMAT def_bool n diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 1e78ef24321e..06a1f091be81 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -1150,7 +1150,7 @@ static int __init futex_init(void) unsigned int futex_shift; unsigned long i; -#if CONFIG_BASE_SMALL +#ifdef CONFIG_BASE_SMALL futex_hashsize = 16; #else futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus()); diff --git a/kernel/user.c b/kernel/user.c index 03cedc366dc9..aa1162deafe4 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -88,7 +88,7 @@ EXPORT_SYMBOL_GPL(init_user_ns); * when changing user ID's (ie setuid() and friends). */ -#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 7) +#define UIDHASH_BITS (IS_ENABLED(CONFIG_BASE_SMALL) ? 3 : 7) #define UIDHASH_SZ (1 << UIDHASH_BITS) #define UIDHASH_MASK (UIDHASH_SZ - 1) #define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) -- cgit v1.2.3-59-g8ed1b