From 11052589cf5c0bab3b4884d423d5f60c38fcf25d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 13 Jul 2022 10:52:07 -0700 Subject: tcp/udp: Make early_demux back namespacified. Commit e21145a9871a ("ipv4: namespacify ip_early_demux sysctl knob") made it possible to enable/disable early_demux on a per-netns basis. Then, we introduced two knobs, tcp_early_demux and udp_early_demux, to switch it for TCP/UDP in commit dddb64bcb346 ("net: Add sysctl to toggle early demux for tcp and udp"). However, the .proc_handler() was wrong and actually disabled us from changing the behaviour in each netns. We can execute early_demux if net.ipv4.ip_early_demux is on and each proto .early_demux() handler is not NULL. When we toggle (tcp|udp)_early_demux, the change itself is saved in each netns variable, but the .early_demux() handler is a global variable, so the handler is switched based on the init_net's sysctl variable. Thus, netns (tcp|udp)_early_demux knobs have nothing to do with the logic. Whether we CAN execute proto .early_demux() is always decided by init_net's sysctl knob, and whether we DO it or not is by each netns ip_early_demux knob. This patch namespacifies (tcp|udp)_early_demux again. For now, the users of the .early_demux() handler are TCP and UDP only, and they are called directly to avoid retpoline. So, we can remove the .early_demux() handler from inet6?_protos and need not dereference them in ip6?_rcv_finish_core(). If another proto needs .early_demux(), we can restore it at that time. Fixes: dddb64bcb346 ("net: Add sysctl to toggle early demux for tcp and udp") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20220713175207.7727-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_input.c | 23 +++++++++++++---------- net/ipv6/tcp_ipv6.c | 9 ++------- net/ipv6/udp.c | 9 ++------- 3 files changed, 17 insertions(+), 24 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 0322cc86b84e..e1ebf5e42ebe 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -45,20 +45,23 @@ #include #include -INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *)); static void ip6_rcv_finish_core(struct net *net, struct sock *sk, struct sk_buff *skb) { - void (*edemux)(struct sk_buff *skb); - - if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { - const struct inet6_protocol *ipprot; - - ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); - if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) - INDIRECT_CALL_2(edemux, tcp_v6_early_demux, - udp_v6_early_demux, skb); + if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) && + !skb_dst(skb) && !skb->sk) { + switch (ipv6_hdr(skb)->nexthdr) { + case IPPROTO_TCP: + if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) + tcp_v6_early_demux(skb); + break; + case IPPROTO_UDP: + if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) + udp_v6_early_demux(skb); + break; + } } + if (!skb_valid_dst(skb)) ip6_route_input(skb); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f37dd4aa91c6..9d3ede293258 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1822,7 +1822,7 @@ do_time_wait: goto discard_it; } -INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) +void tcp_v6_early_demux(struct sk_buff *skb) { const struct ipv6hdr *hdr; const struct tcphdr *th; @@ -2176,12 +2176,7 @@ struct proto tcpv6_prot = { }; EXPORT_SYMBOL_GPL(tcpv6_prot); -/* thinking of making this const? Don't. - * early_demux can change based on sysctl. - */ -static struct inet6_protocol tcpv6_protocol = { - .early_demux = tcp_v6_early_demux, - .early_demux_handler = tcp_v6_early_demux, +static const struct inet6_protocol tcpv6_protocol = { .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 55afd7f39c04..e2f2e087a753 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1052,7 +1052,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, return NULL; } -INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) +void udp_v6_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); const struct udphdr *uh; @@ -1660,12 +1660,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname, return ipv6_getsockopt(sk, level, optname, optval, optlen); } -/* thinking of making this const? Don't. - * early_demux can change based on sysctl. - */ -static struct inet6_protocol udpv6_protocol = { - .early_demux = udp_v6_early_demux, - .early_demux_handler = udp_v6_early_demux, +static const struct inet6_protocol udpv6_protocol = { .handler = udpv6_rcv, .err_handler = udpv6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, -- cgit v1.2.3-59-g8ed1b