From 0146dca70b877b73c5fd9c67912b8a0ca8a7bac7 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 27 Apr 2020 17:59:34 +0200 Subject: xfrm: add support for UDPv6 encapsulation of ESP This patch adds support for encapsulation of ESP over UDPv6. The code is very similar to the IPv4 encapsulation implementation, and allows to easily add espintcp on IPv6 as a follow-up. Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- include/net/ipv6_stubs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net/ipv6_stubs.h') diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 3e7d2c0e79ca..f033a17b53b6 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -56,6 +56,9 @@ struct ipv6_stub { void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, bool router, bool solicited, bool override, bool inc_opt); +#if IS_ENABLED(CONFIG_XFRM) + int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb); +#endif struct neigh_table *nd_tbl; }; extern const struct ipv6_stub *ipv6_stub __read_mostly; -- cgit v1.2.3-59-g8ed1b From 26333c37fc285e7372f1b9461f3ae0ba3dc699c9 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 27 Apr 2020 17:59:35 +0200 Subject: xfrm: add IPv6 support for espintcp This extends espintcp to support IPv6, building on the existing code and the new UDPv6 encapsulation support. Most of the code is either reused directly (stream parser, ULP) or very similar to the IPv4 variant (net/ipv6/esp6.c changes). The separation of config options for IPv4 and IPv6 espintcp requires a bit of Kconfig gymnastics to enable the core code. Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- include/net/ipv6_stubs.h | 2 + net/ipv4/Kconfig | 1 + net/ipv6/Kconfig | 12 +++ net/ipv6/af_inet6.c | 1 + net/ipv6/esp6.c | 188 ++++++++++++++++++++++++++++++++++++++++++++++- net/xfrm/Kconfig | 3 + net/xfrm/Makefile | 2 +- net/xfrm/espintcp.c | 56 +++++++++++--- 8 files changed, 252 insertions(+), 13 deletions(-) (limited to 'include/net/ipv6_stubs.h') diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index f033a17b53b6..1e9e0cf7dc75 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -58,6 +58,8 @@ struct ipv6_stub { bool router, bool solicited, bool override, bool inc_opt); #if IS_ENABLED(CONFIG_XFRM) int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb); + int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type); #endif struct neigh_table *nd_tbl; }; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 25a8888826b8..014aaa17dc79 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -384,6 +384,7 @@ config INET_ESPINTCP depends on XFRM && INET_ESP select STREAM_PARSER select NET_SOCK_MSG + select XFRM_ESPINTCP help Support for RFC 8229 encapsulation of ESP and IKE over TCP/IPv4 sockets. diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 2ccaee98fddb..468a2faadc7d 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -88,6 +88,18 @@ config INET6_ESP_OFFLOAD If unsure, say N. +config INET6_ESPINTCP + bool "IPv6: ESP in TCP encapsulation (RFC 8229)" + depends on XFRM && INET6_ESP + select STREAM_PARSER + select NET_SOCK_MSG + select XFRM_ESPINTCP + help + Support for RFC 8229 encapsulation of ESP and IKE over + TCP/IPv6 sockets. + + If unsure, say N. + config INET6_IPCOMP tristate "IPv6: IPComp transformation" select INET6_XFRM_TUNNEL diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b0b99c08350a..cbbb00bad20e 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -964,6 +964,7 @@ static const struct ipv6_stub ipv6_stub_impl = { .ndisc_send_na = ndisc_send_na, #if IS_ENABLED(CONFIG_XFRM) .xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv, + .xfrm6_rcv_encap = xfrm6_rcv_encap, #endif .nd_tbl = &nd_tbl, }; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index e8800968e209..c43592771126 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -33,6 +33,9 @@ #include #include #include +#include +#include +#include #include @@ -132,6 +135,132 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp) put_page(sg_page(sg)); } +#ifdef CONFIG_INET6_ESPINTCP +struct esp_tcp_sk { + struct sock *sk; + struct rcu_head rcu; +}; + +static void esp_free_tcp_sk(struct rcu_head *head) +{ + struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu); + + sock_put(esk->sk); + kfree(esk); +} + +static struct sock *esp6_find_tcp_sk(struct xfrm_state *x) +{ + struct xfrm_encap_tmpl *encap = x->encap; + struct esp_tcp_sk *esk; + __be16 sport, dport; + struct sock *nsk; + struct sock *sk; + + sk = rcu_dereference(x->encap_sk); + if (sk && sk->sk_state == TCP_ESTABLISHED) + return sk; + + spin_lock_bh(&x->lock); + sport = encap->encap_sport; + dport = encap->encap_dport; + nsk = rcu_dereference_protected(x->encap_sk, + lockdep_is_held(&x->lock)); + if (sk && sk == nsk) { + esk = kmalloc(sizeof(*esk), GFP_ATOMIC); + if (!esk) { + spin_unlock_bh(&x->lock); + return ERR_PTR(-ENOMEM); + } + RCU_INIT_POINTER(x->encap_sk, NULL); + esk->sk = sk; + call_rcu(&esk->rcu, esp_free_tcp_sk); + } + spin_unlock_bh(&x->lock); + + sk = __inet6_lookup_established(xs_net(x), &tcp_hashinfo, &x->id.daddr.in6, + dport, &x->props.saddr.in6, ntohs(sport), 0, 0); + if (!sk) + return ERR_PTR(-ENOENT); + + if (!tcp_is_ulp_esp(sk)) { + sock_put(sk); + return ERR_PTR(-EINVAL); + } + + spin_lock_bh(&x->lock); + nsk = rcu_dereference_protected(x->encap_sk, + lockdep_is_held(&x->lock)); + if (encap->encap_sport != sport || + encap->encap_dport != dport) { + sock_put(sk); + sk = nsk ?: ERR_PTR(-EREMCHG); + } else if (sk == nsk) { + sock_put(sk); + } else { + rcu_assign_pointer(x->encap_sk, sk); + } + spin_unlock_bh(&x->lock); + + return sk; +} + +static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) +{ + struct sock *sk; + int err; + + rcu_read_lock(); + + sk = esp6_find_tcp_sk(x); + err = PTR_ERR_OR_ZERO(sk); + if (err) + goto out; + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) + err = espintcp_queue_out(sk, skb); + else + err = espintcp_push_skb(sk, skb); + bh_unlock_sock(sk); + +out: + rcu_read_unlock(); + return err; +} + +static int esp_output_tcp_encap_cb(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; + + return esp_output_tcp_finish(x, skb); +} + +static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) +{ + int err; + + local_bh_disable(); + err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb); + local_bh_enable(); + + /* EINPROGRESS just happens to do the right thing. It + * actually means that the skb has been consumed and + * isn't coming back. + */ + return err ?: -EINPROGRESS; +} +#else +static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) +{ + kfree_skb(skb); + + return -EOPNOTSUPP; +} +#endif + static void esp_output_encap_csum(struct sk_buff *skb) { /* UDP encap with IPv6 requires a valid checksum */ @@ -181,7 +310,11 @@ static void esp_output_done(struct crypto_async_request *base, int err) secpath_reset(skb); xfrm_dev_resume(skb); } else { - xfrm_output_resume(skb, err); + if (!err && + x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) + esp_output_tail_tcp(x, skb); + else + xfrm_output_resume(skb, err); } } @@ -274,6 +407,41 @@ static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb, return (struct ip_esp_hdr *)(uh + 1); } +#ifdef CONFIG_INET6_ESPINTCP +static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x, + struct sk_buff *skb, + struct esp_info *esp) +{ + __be16 *lenp = (void *)esp->esph; + struct ip_esp_hdr *esph; + unsigned int len; + struct sock *sk; + + len = skb->len + esp->tailen - skb_transport_offset(skb); + if (len > IP_MAX_MTU) + return ERR_PTR(-EMSGSIZE); + + rcu_read_lock(); + sk = esp6_find_tcp_sk(x); + rcu_read_unlock(); + + if (IS_ERR(sk)) + return ERR_CAST(sk); + + *lenp = htons(len); + esph = (struct ip_esp_hdr *)(lenp + 1); + + return esph; +} +#else +static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x, + struct sk_buff *skb, + struct esp_info *esp) +{ + return ERR_PTR(-EOPNOTSUPP); +} +#endif + static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { @@ -294,6 +462,9 @@ static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb, case UDP_ENCAP_ESPINUDP_NON_IKE: esph = esp6_output_udp_encap(skb, encap_type, esp, sport, dport); break; + case TCP_ENCAP_ESPINTCP: + esph = esp6_output_tcp_encap(x, skb, esp); + break; } if (IS_ERR(esph)) @@ -509,6 +680,9 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info if (sg != dsg) esp_ssg_unref(x, tmp); + if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) + err = esp_output_tail_tcp(x, skb); + error_free: kfree(tmp); error: @@ -633,9 +807,13 @@ int esp6_input_done2(struct sk_buff *skb, int err) const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct xfrm_encap_tmpl *encap = x->encap; struct udphdr *uh = (void *)(skb_network_header(skb) + hdr_len); + struct tcphdr *th = (void *)(skb_network_header(skb) + hdr_len); __be16 source; switch (x->encap->encap_type) { + case TCP_ENCAP_ESPINTCP: + source = th->source; + break; case UDP_ENCAP_ESPINUDP: case UDP_ENCAP_ESPINUDP_NON_IKE: source = uh->source; @@ -1039,6 +1217,14 @@ static int esp6_init_state(struct xfrm_state *x) case UDP_ENCAP_ESPINUDP_NON_IKE: x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32); break; +#ifdef CONFIG_INET6_ESPINTCP + case TCP_ENCAP_ESPINTCP: + /* only the length field, TCP encap is done by + * the socket + */ + x->props.header_len += 2; + break; +#endif } } diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 6921a18201a0..b7fd9c838416 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -99,4 +99,7 @@ config NET_KEY_MIGRATE If unsure, say N. +config XFRM_ESPINTCP + bool + endif # INET diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 212a4fcb4a88..2d4bb4b9f75e 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -11,4 +11,4 @@ obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o -obj-$(CONFIG_INET_ESPINTCP) += espintcp.o +obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 037ea156d2f9..2132a3b6df0f 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -6,6 +6,9 @@ #include #include #include +#if IS_ENABLED(CONFIG_IPV6) +#include +#endif static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb, struct sock *sk) @@ -31,7 +34,12 @@ static void handle_esp(struct sk_buff *skb, struct sock *sk) rcu_read_lock(); skb->dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif); local_bh_disable(); - xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + ipv6_stub->xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); + else +#endif + xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); local_bh_enable(); rcu_read_unlock(); } @@ -347,6 +355,9 @@ unlock: static struct proto espintcp_prot __ro_after_init; static struct proto_ops espintcp_ops __ro_after_init; +static struct proto espintcp6_prot; +static struct proto_ops espintcp6_ops; +static DEFINE_MUTEX(tcpv6_prot_mutex); static void espintcp_data_ready(struct sock *sk) { @@ -384,10 +395,14 @@ static void espintcp_destruct(struct sock *sk) bool tcp_is_ulp_esp(struct sock *sk) { - return sk->sk_prot == &espintcp_prot; + return sk->sk_prot == &espintcp_prot || sk->sk_prot == &espintcp6_prot; } EXPORT_SYMBOL_GPL(tcp_is_ulp_esp); +static void build_protos(struct proto *espintcp_prot, + struct proto_ops *espintcp_ops, + const struct proto *orig_prot, + const struct proto_ops *orig_ops); static int espintcp_init_sk(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -415,8 +430,19 @@ static int espintcp_init_sk(struct sock *sk) strp_check_rcv(&ctx->strp); skb_queue_head_init(&ctx->ike_queue); skb_queue_head_init(&ctx->out_queue); - sk->sk_prot = &espintcp_prot; - sk->sk_socket->ops = &espintcp_ops; + + if (sk->sk_family == AF_INET) { + sk->sk_prot = &espintcp_prot; + sk->sk_socket->ops = &espintcp_ops; + } else { + mutex_lock(&tcpv6_prot_mutex); + if (!espintcp6_prot.recvmsg) + build_protos(&espintcp6_prot, &espintcp6_ops, sk->sk_prot, sk->sk_socket->ops); + mutex_unlock(&tcpv6_prot_mutex); + + sk->sk_prot = &espintcp6_prot; + sk->sk_socket->ops = &espintcp6_ops; + } ctx->saved_data_ready = sk->sk_data_ready; ctx->saved_write_space = sk->sk_write_space; sk->sk_data_ready = espintcp_data_ready; @@ -489,6 +515,20 @@ static __poll_t espintcp_poll(struct file *file, struct socket *sock, return mask; } +static void build_protos(struct proto *espintcp_prot, + struct proto_ops *espintcp_ops, + const struct proto *orig_prot, + const struct proto_ops *orig_ops) +{ + memcpy(espintcp_prot, orig_prot, sizeof(struct proto)); + memcpy(espintcp_ops, orig_ops, sizeof(struct proto_ops)); + espintcp_prot->sendmsg = espintcp_sendmsg; + espintcp_prot->recvmsg = espintcp_recvmsg; + espintcp_prot->close = espintcp_close; + espintcp_prot->release_cb = espintcp_release; + espintcp_ops->poll = espintcp_poll; +} + static struct tcp_ulp_ops espintcp_ulp __read_mostly = { .name = "espintcp", .owner = THIS_MODULE, @@ -497,13 +537,7 @@ static struct tcp_ulp_ops espintcp_ulp __read_mostly = { void __init espintcp_init(void) { - memcpy(&espintcp_prot, &tcp_prot, sizeof(tcp_prot)); - memcpy(&espintcp_ops, &inet_stream_ops, sizeof(inet_stream_ops)); - espintcp_prot.sendmsg = espintcp_sendmsg; - espintcp_prot.recvmsg = espintcp_recvmsg; - espintcp_prot.close = espintcp_close; - espintcp_prot.release_cb = espintcp_release; - espintcp_ops.poll = espintcp_poll; + build_protos(&espintcp_prot, &espintcp_ops, &tcp_prot, &inet_stream_ops); tcp_register_ulp(&espintcp_ulp); } -- cgit v1.2.3-59-g8ed1b From 11dd74b338bf83f8bca70b57bad33a903fedfa6e Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Mon, 27 Apr 2020 13:56:45 -0700 Subject: net: ipv6: new arg skip_notify to ip6_rt_del Used in subsequent work to skip route delete notifications on nexthop deletes. Suggested-by: David Ahern Signed-off-by: Roopa Prabhu Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 +- include/net/ipv6_stubs.h | 2 +- net/ipv4/nexthop.c | 2 +- net/ipv6/addrconf.c | 12 ++++++------ net/ipv6/addrconf_core.c | 3 ++- net/ipv6/anycast.c | 4 ++-- net/ipv6/ndisc.c | 2 +- net/ipv6/route.c | 11 +++++++---- 8 files changed, 21 insertions(+), 17 deletions(-) (limited to 'include/net/ipv6_stubs.h') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 9947eb1e9eb6..e525f003e619 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -123,7 +123,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg); int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); int ip6_ins_rt(struct net *net, struct fib6_info *f6i); -int ip6_del_rt(struct net *net, struct fib6_info *f6i); +int ip6_del_rt(struct net *net, struct fib6_info *f6i, bool skip_notify); void rt6_flush_exceptions(struct fib6_info *f6i); void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args, diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 3e7d2c0e79ca..a5f7c12c326a 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -48,7 +48,7 @@ struct ipv6_stub { struct netlink_ext_ack *extack); void (*fib6_nh_release)(struct fib6_nh *fib6_nh); void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt); - int (*ip6_del_rt)(struct net *net, struct fib6_info *rt); + int (*ip6_del_rt)(struct net *net, struct fib6_info *rt, bool skip_notify); void (*fib6_rt_update)(struct net *net, struct fib6_info *rt, struct nl_info *info); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index fdfca534d094..9999687ad6dc 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -784,7 +784,7 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) { /* __ip6_del_rt does a release, so do a hold here */ fib6_info_hold(f6i); - ipv6_stub->ip6_del_rt(net, f6i); + ipv6_stub->ip6_del_rt(net, f6i, false); } } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 27b4fb6e452b..2c4f20ec1e2a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1238,7 +1238,7 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, ifp->idev->dev, 0, RTF_DEFAULT, true); if (f6i) { if (del_rt) - ip6_del_rt(dev_net(ifp->idev->dev), f6i); + ip6_del_rt(dev_net(ifp->idev->dev), f6i, false); else { if (!(f6i->fib6_flags & RTF_EXPIRES)) fib6_set_expires(f6i, expires); @@ -2718,7 +2718,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) if (rt) { /* Autoconf prefix route */ if (valid_lft == 0) { - ip6_del_rt(net, rt); + ip6_del_rt(net, rt, false); rt = NULL; } else if (addrconf_finite_timeout(rt_expires)) { /* not infinity */ @@ -3813,7 +3813,7 @@ restart: spin_unlock_bh(&ifa->lock); if (rt) - ip6_del_rt(net, rt); + ip6_del_rt(net, rt, false); if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); @@ -4652,7 +4652,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, prio = ifp->rt_priority ? : IP6_RT_PRIO_ADDRCONF; if (f6i->fib6_metric != prio) { /* delete old one */ - ip6_del_rt(dev_net(ifp->idev->dev), f6i); + ip6_del_rt(dev_net(ifp->idev->dev), f6i, false); /* add new one */ addrconf_prefix_route(modify_peer ? &ifp->peer_addr : &ifp->addr, @@ -6073,10 +6073,10 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) ifp->idev->dev, 0, 0, false); if (rt) - ip6_del_rt(net, rt); + ip6_del_rt(net, rt, false); } if (ifp->rt) { - ip6_del_rt(net, ifp->rt); + ip6_del_rt(net, ifp->rt, false); ifp->rt = NULL; } rt_genid_bump_ipv6(net); diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index ea00ce3d4117..9ebf3fe0d2b1 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -185,7 +185,8 @@ static int eafnosupport_fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, return -EAFNOSUPPORT; } -static int eafnosupport_ip6_del_rt(struct net *net, struct fib6_info *rt) +static int eafnosupport_ip6_del_rt(struct net *net, struct fib6_info *rt, + bool skip_notify) { return -EAFNOSUPPORT; } diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index fed91ab7ec46..893261230ffc 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -364,7 +364,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) ipv6_del_acaddr_hash(aca); addrconf_leave_solict(idev, &aca->aca_addr); - ip6_del_rt(dev_net(idev->dev), aca->aca_rt); + ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false); aca_put(aca); return 0; @@ -393,7 +393,7 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) addrconf_leave_solict(idev, &aca->aca_addr); - ip6_del_rt(dev_net(idev->dev), aca->aca_rt); + ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false); aca_put(aca); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 1ecd4e9b0bdf..2d09c4da03ee 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1302,7 +1302,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) } } if (rt && lifetime == 0) { - ip6_del_rt(net, rt); + ip6_del_rt(net, rt, false); rt = NULL; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 310cbddaa533..486c36a14f24 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -984,7 +984,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, gwaddr, dev); if (rt && !lifetime) { - ip6_del_rt(net, rt); + ip6_del_rt(net, rt, false); rt = NULL; } @@ -3729,9 +3729,12 @@ out: return err; } -int ip6_del_rt(struct net *net, struct fib6_info *rt) +int ip6_del_rt(struct net *net, struct fib6_info *rt, bool skip_notify) { - struct nl_info info = { .nl_net = net }; + struct nl_info info = { + .nl_net = net, + .skip_notify = skip_notify + }; return __ip6_del_rt(rt, &info); } @@ -4252,7 +4255,7 @@ restart: (!idev || idev->cnf.accept_ra != 2) && fib6_info_hold_safe(rt)) { rcu_read_unlock(); - ip6_del_rt(net, rt); + ip6_del_rt(net, rt, false); goto restart; } } -- cgit v1.2.3-59-g8ed1b From 3e50ddd8b8d5067796fc87cbbb25c71451ccb385 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 May 2020 10:06:06 +0200 Subject: xfrm: expose local_rxpmtu via ipv6_stubs We cannot call this function from the core kernel unless we would force CONFIG_IPV6=y. Therefore expose this via ipv6_stubs so we can call it from net/xfrm in the followup patch. Since the call is expected to be unlikely, no extra code for the IPV6=y case is added and we will always eat the indirection cost. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/ipv6_stubs.h | 1 + include/net/xfrm.h | 1 + net/ipv6/af_inet6.c | 1 + net/ipv6/xfrm6_output.c | 2 +- 4 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/net/ipv6_stubs.h') diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 1e9e0cf7dc75..d8ab3872aa2a 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -57,6 +57,7 @@ struct ipv6_stub { const struct in6_addr *solicited_addr, bool router, bool solicited, bool override, bool inc_opt); #if IS_ENABLED(CONFIG_XFRM) + void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu); int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb); int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 8b956528b6e6..10295ab4cdfb 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1608,6 +1608,7 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr); #ifdef CONFIG_XFRM +void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu); int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); int xfrm_user_policy(struct sock *sk, int optname, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index cbbb00bad20e..aa4882929fd0 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -963,6 +963,7 @@ static const struct ipv6_stub ipv6_stub_impl = { .udpv6_encap_enable = udpv6_encap_enable, .ndisc_send_na = ndisc_send_na, #if IS_ENABLED(CONFIG_XFRM) + .xfrm6_local_rxpmtu = xfrm6_local_rxpmtu, .xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv, .xfrm6_rcv_encap = xfrm6_rcv_encap, #endif diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 855078a43fc7..23e2b52cfba6 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -40,7 +40,7 @@ static int xfrm6_local_dontfrag(struct sk_buff *skb) return 0; } -static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu) +void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu) { struct flowi6 fl6; struct sock *sk = skb->sk; -- cgit v1.2.3-59-g8ed1b From cb0721c7e200750907bb8ef59b12646a5cb2dadf Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 8 May 2020 10:46:10 -0700 Subject: net: Refactor arguments of inet{,6}_bind The intent is to add an additional bind parameter in the next commit. Instead of adding another argument, let's convert all existing flag arguments into an extendable bit field. No functional changes. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Acked-by: Andrey Ignatov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200508174611.228805-4-sdf@google.com --- include/net/inet_common.h | 6 +++++- include/net/ipv6_stubs.h | 2 +- net/core/filter.c | 6 ++++-- net/ipv4/af_inet.c | 10 +++++----- net/ipv6/af_inet6.c | 10 +++++----- 5 files changed, 20 insertions(+), 14 deletions(-) (limited to 'include/net/ipv6_stubs.h') diff --git a/include/net/inet_common.h b/include/net/inet_common.h index ae2ba897675c..c38f4f7d660a 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -35,8 +35,12 @@ int inet_shutdown(struct socket *sock, int how); int inet_listen(struct socket *sock, int backlog); void inet_sock_destruct(struct sock *sk); int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); +/* Don't allocate port at this moment, defer to connect. */ +#define BIND_FORCE_ADDRESS_NO_PORT (1 << 0) +/* Grab and release socket lock. */ +#define BIND_WITH_LOCK (1 << 1) int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, - bool force_bind_address_no_port, bool with_lock); + u32 flags); int inet_getname(struct socket *sock, struct sockaddr *uaddr, int peer); int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index a5f7c12c326a..6e622dd3122e 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -63,7 +63,7 @@ extern const struct ipv6_stub *ipv6_stub __read_mostly; /* A stub used by bpf helpers. Similarly ugly as ipv6_stub */ struct ipv6_bpf_stub { int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len, - bool force_bind_address_no_port, bool with_lock); + u32 flags); struct sock *(*udp6_lib_lookup)(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, diff --git a/net/core/filter.c b/net/core/filter.c index dfaf5df13722..fa9ddab5dd1f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4538,7 +4538,8 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr, return err; if (((struct sockaddr_in *)addr)->sin_port != htons(0)) return err; - return __inet_bind(sk, addr, addr_len, true, false); + return __inet_bind(sk, addr, addr_len, + BIND_FORCE_ADDRESS_NO_PORT); #if IS_ENABLED(CONFIG_IPV6) } else if (addr->sa_family == AF_INET6) { if (addr_len < SIN6_LEN_RFC2133) @@ -4548,7 +4549,8 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr, /* ipv6_bpf_stub cannot be NULL, since it's called from * bpf_cgroup_inet6_connect hook and ipv6 is already loaded */ - return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, true, false); + return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, + BIND_FORCE_ADDRESS_NO_PORT); #endif /* CONFIG_IPV6 */ } #endif /* CONFIG_INET */ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 6177c4ba0037..68e74b1b0f26 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -450,12 +450,12 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (err) return err; - return __inet_bind(sk, uaddr, addr_len, false, true); + return __inet_bind(sk, uaddr, addr_len, BIND_WITH_LOCK); } EXPORT_SYMBOL(inet_bind); int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, - bool force_bind_address_no_port, bool with_lock) + u32 flags) { struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct inet_sock *inet = inet_sk(sk); @@ -506,7 +506,7 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, * would be illegal to use them (multicast/broadcast) in * which case the sending device address is used. */ - if (with_lock) + if (flags & BIND_WITH_LOCK) lock_sock(sk); /* Check these errors (active socket, double bind). */ @@ -520,7 +520,7 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, /* Make sure we are allowed to bind here. */ if (snum || !(inet->bind_address_no_port || - force_bind_address_no_port)) { + (flags & BIND_FORCE_ADDRESS_NO_PORT))) { if (sk->sk_prot->get_port(sk, snum)) { inet->inet_saddr = inet->inet_rcv_saddr = 0; err = -EADDRINUSE; @@ -543,7 +543,7 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, sk_dst_reset(sk); err = 0; out_release_sock: - if (with_lock) + if (flags & BIND_WITH_LOCK) release_sock(sk); out: return err; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 345baa0a754f..552c2592b81c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -273,7 +273,7 @@ out_rcu_unlock: } static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, - bool force_bind_address_no_port, bool with_lock) + u32 flags) { struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr; struct inet_sock *inet = inet_sk(sk); @@ -297,7 +297,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) return -EACCES; - if (with_lock) + if (flags & BIND_WITH_LOCK) lock_sock(sk); /* Check these errors (active socket, double bind). */ @@ -400,7 +400,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, /* Make sure we are allowed to bind here. */ if (snum || !(inet->bind_address_no_port || - force_bind_address_no_port)) { + (flags & BIND_FORCE_ADDRESS_NO_PORT))) { if (sk->sk_prot->get_port(sk, snum)) { sk->sk_ipv6only = saved_ipv6only; inet_reset_saddr(sk); @@ -423,7 +423,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, inet->inet_dport = 0; inet->inet_daddr = 0; out: - if (with_lock) + if (flags & BIND_WITH_LOCK) release_sock(sk); return err; out_unlock: @@ -451,7 +451,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (err) return err; - return __inet6_bind(sk, uaddr, addr_len, false, true); + return __inet6_bind(sk, uaddr, addr_len, BIND_WITH_LOCK); } EXPORT_SYMBOL(inet6_bind); -- cgit v1.2.3-59-g8ed1b