diff options
Diffstat (limited to 'net/ipv6/udp.c')
-rw-r--r-- | net/ipv6/udp.c | 410 |
1 files changed, 251 insertions, 159 deletions
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5dc439a391fe..bc65e5b7195b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -17,6 +17,7 @@ * YOSHIFUJI Hideaki @USAGI: convert /proc/net/udp6 to seq_file. */ +#include <linux/bpf-cgroup.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> @@ -40,6 +41,7 @@ #include <net/transp_v6.h> #include <net/ip6_route.h> #include <net/raw.h> +#include <net/seg6.h> #include <net/tcp_states.h> #include <net/ip6_checksum.h> #include <net/ip6_tunnel.h> @@ -54,6 +56,20 @@ #include <trace/events/skb.h> #include "udp_impl.h" +static void udpv6_destruct_sock(struct sock *sk) +{ + udp_destruct_common(sk); + inet6_sock_destruct(sk); +} + +int udpv6_init_sock(struct sock *sk) +{ + skb_queue_head_init(&udp_sk(sk)->reader_queue); + sk->sk_destruct = udpv6_destruct_sock; + set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); + return 0; +} + static u32 udp6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, @@ -103,7 +119,7 @@ static int compute_score(struct sock *sk, struct net *net, const struct in6_addr *daddr, unsigned short hnum, int dif, int sdif) { - int score; + int bound_dev_if, score; struct inet_sock *inet; bool dev_match; @@ -130,10 +146,12 @@ static int compute_score(struct sock *sk, struct net *net, score++; } - dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif); + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + dev_match = udp_sk_bound_dev_eq(net, bound_dev_if, dif, sdif); if (!dev_match) return -1; - score++; + if (bound_dev_if) + score++; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; @@ -141,6 +159,24 @@ static int compute_score(struct sock *sk, struct net *net, return score; } +static struct sock *lookup_reuseport(struct net *net, struct sock *sk, + struct sk_buff *skb, + const struct in6_addr *saddr, + __be16 sport, + const struct in6_addr *daddr, + unsigned int hnum) +{ + struct sock *reuse_sk = NULL; + u32 hash; + + if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { + hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); + reuse_sk = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); + } + return reuse_sk; +} + /* called with rcu_read_lock() */ static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, @@ -150,7 +186,6 @@ static struct sock *udp6_lib_lookup2(struct net *net, { struct sock *sk, *result; int score, badness; - u32 hash = 0; result = NULL; badness = -1; @@ -158,23 +193,44 @@ static struct sock *udp6_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { - if (sk->sk_reuseport && - sk->sk_state != TCP_ESTABLISHED) { - hash = udp6_ehashfn(net, daddr, hnum, - saddr, sport); - - result = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - if (result && !reuseport_has_conns(sk, false)) - return result; - } - result = sk; + result = lookup_reuseport(net, sk, skb, + saddr, sport, daddr, hnum); + /* Fall back to scoring if group has connections */ + if (result && !reuseport_has_conns(sk)) + return result; + + result = result ? : sk; badness = score; } } return result; } +static inline struct sock *udp6_lookup_run_bpf(struct net *net, + struct udp_table *udptable, + struct sk_buff *skb, + const struct in6_addr *saddr, + __be16 sport, + const struct in6_addr *daddr, + u16 hnum, const int dif) +{ + struct sock *sk, *reuse_sk; + bool no_reuseport; + + if (udptable != &udp_table) + return NULL; /* only UDP is supported */ + + no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, saddr, sport, + daddr, hnum, dif, &sk); + if (no_reuseport || IS_ERR_OR_NULL(sk)) + return sk; + + reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); + if (reuse_sk) + sk = reuse_sk; + return sk; +} + /* rcu_read_lock() must be held */ struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, @@ -185,25 +241,42 @@ struct sock *__udp6_lib_lookup(struct net *net, unsigned short hnum = ntohs(dport); unsigned int hash2, slot2; struct udp_hslot *hslot2; - struct sock *result; + struct sock *result, *sk; hash2 = ipv6_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; + /* Lookup connected or non-wildcard sockets */ result = udp6_lib_lookup2(net, saddr, sport, daddr, hnum, dif, sdif, hslot2, skb); - if (!result) { - hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); - slot2 = hash2 & udptable->mask; + if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED) + goto done; + + /* Lookup redirect from BPF */ + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { + sk = udp6_lookup_run_bpf(net, udptable, skb, + saddr, sport, daddr, hnum, dif); + if (sk) { + result = sk; + goto done; + } + } - hslot2 = &udptable->hash2[slot2]; + /* Got non-wildcard socket or error on first lookup */ + if (result) + goto done; - result = udp6_lib_lookup2(net, saddr, sport, - &in6addr_any, hnum, dif, sdif, - hslot2, skb); - } + /* Lookup wildcard sockets */ + hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); + slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; + + result = udp6_lib_lookup2(net, saddr, sport, + &in6addr_any, hnum, dif, sdif, + hslot2, skb); +done: if (IS_ERR(result)) return NULL; return result; @@ -221,7 +294,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, inet6_sdif(skb), udptable, skb); } -struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, +struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport) { const struct ipv6hdr *iph = ipv6_hdr(skb); @@ -230,7 +303,6 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, &iph->daddr, dport, inet6_iif(skb), inet6_sdif(skb), &udp_table, NULL); } -EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb); /* Must be called under rcu_read_lock(). * Does increment socket refcount. @@ -265,7 +337,7 @@ static int udp6_skb_len(struct sk_buff *skb) */ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) + int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -285,7 +357,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, try_again: off = sk_peek_offset(sk, flags); - skb = __skb_recv_udp(sk, flags, noblock, &off, &err); + skb = __skb_recv_udp(sk, flags, &off, &err); if (!skb) return err; @@ -334,7 +406,7 @@ try_again: if (!peeking) SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS); - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); /* Copy the address. */ if (msg->msg_name) { @@ -355,9 +427,8 @@ try_again: } *addr_len = sizeof(*sin6); - if (cgroup_bpf_enabled) - BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, - (struct sockaddr *)sin6); + BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, + (struct sockaddr *)sin6); } if (udp_sk(sk)->gro_enabled) @@ -449,12 +520,14 @@ static struct sock *__udp6_lib_err_encap(struct net *net, const struct ipv6hdr *hdr, int offset, struct udphdr *uh, struct udp_table *udptable, + struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, __be32 info) { + int (*lookup)(struct sock *sk, struct sk_buff *skb); int network_offset, transport_offset; - struct sock *sk; + struct udp_sock *up; network_offset = skb_network_offset(skb); transport_offset = skb_transport_offset(skb); @@ -465,18 +538,28 @@ static struct sock *__udp6_lib_err_encap(struct net *net, /* Transport header needs to point to the UDP header */ skb_set_transport_header(skb, offset); + if (sk) { + up = udp_sk(sk); + + lookup = READ_ONCE(up->encap_err_lookup); + if (lookup && lookup(sk, skb)) + sk = NULL; + + goto out; + } + sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source, &hdr->saddr, uh->dest, inet6_iif(skb), 0, udptable, skb); if (sk) { - int (*lookup)(struct sock *sk, struct sk_buff *skb); - struct udp_sock *up = udp_sk(sk); + up = udp_sk(sk); lookup = READ_ONCE(up->encap_err_lookup); if (!lookup || lookup(sk, skb)) sk = NULL; } +out: if (!sk) { sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code, offset, info)); @@ -495,7 +578,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct ipv6_pinfo *np; const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct in6_addr *saddr = &hdr->saddr; - const struct in6_addr *daddr = &hdr->daddr; + const struct in6_addr *daddr = seg6_get_daddr(skb, opt) ? : &hdr->daddr; struct udphdr *uh = (struct udphdr *)(skb->data+offset); bool tunnel = false; struct sock *sk; @@ -505,16 +588,17 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), inet6_sdif(skb), udptable, NULL); - if (!sk) { + + if (!sk || udp_sk(sk)->encap_type) { /* No socket for error: try tunnels before discarding */ - sk = ERR_PTR(-ENOENT); if (static_branch_unlikely(&udpv6_encap_needed_key)) { sk = __udp6_lib_err_encap(net, hdr, offset, uh, - udptable, skb, + udptable, sk, skb, opt, type, code, info); if (!sk) return 0; - } + } else + sk = ERR_PTR(-ENOENT); if (IS_ERR(sk)) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), @@ -546,8 +630,11 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } /* Tunnels don't have an application socket: don't pass errors back */ - if (tunnel) + if (tunnel) { + if (udp_sk(sk)->encap_err_rcv) + udp_sk(sk)->encap_err_rcv(sk, skb, offset); goto out; + } if (!np->recverr) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) @@ -557,7 +644,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } sk->sk_err = err; - sk->sk_error_report(sk); + sk_error_report(sk); out: return 0; } @@ -577,13 +664,20 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) rc = __udp_enqueue_schedule_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); + enum skb_drop_reason drop_reason; /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) + if (rc == -ENOMEM) { UDP6_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); + drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF; + } else { + UDP6_INC_STATS(sock_net(sk), + UDP_MIB_MEMERRORS, is_udplite); + drop_reason = SKB_DROP_REASON_PROTO_MEM; + } UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return -1; } @@ -599,11 +693,14 @@ static __inline__ int udpv6_err(struct sk_buff *skb, static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) { + enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; struct udp_sock *up = udp_sk(sk); int is_udplite = IS_UDPLITE(sk); - if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) + if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { + drop_reason = SKB_DROP_REASON_XFRM_POLICY; goto drop; + } if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) { int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); @@ -630,9 +727,9 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) ret = encap_rcv(sk, skb); if (ret <= 0) { - __UDP_INC_STATS(sock_net(sk), - UDP_MIB_INDATAGRAMS, - is_udplite); + __UDP6_INC_STATS(sock_net(sk), + UDP_MIB_INDATAGRAMS, + is_udplite); return -ret; } } @@ -643,7 +740,7 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) /* * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). */ - if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { if (up->pcrlen == 0) { /* full coverage was set */ net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n", @@ -662,8 +759,10 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) udp_lib_checksum_complete(skb)) goto csum_error; - if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) + if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) { + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; goto drop; + } udp_csum_pull_header(skb); @@ -672,11 +771,12 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) return __udpv6_queue_rcv_skb(sk, skb); csum_error: + drop_reason = SKB_DROP_REASON_UDP_CSUM; __UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); drop: __UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); atomic_inc(&sk->sk_drops); - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return -1; } @@ -693,6 +793,7 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) skb_list_walk_safe(segs, skb, next) { __skb_pull(skb, skb_transport_offset(skb)); + udp_post_segment_fix_csum(skb); ret = udpv6_queue_rcv_one_skb(sk, skb); if (ret > 0) ip6_protocol_deliver_rcu(dev_net(skb->dev), skb, ret, @@ -716,7 +817,7 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk, (inet->inet_dport && inet->inet_dport != rmt_port) || (!ipv6_addr_any(&sk->sk_v6_daddr) && !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) || - !udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif) || + !udp_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, sdif) || (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) && !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))) return false; @@ -813,7 +914,7 @@ static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) if (udp_sk_rx_dst_set(sk, dst)) { const struct rt6_info *rt = (const struct rt6_info *)dst; - inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); + sk->sk_rx_dst_cookie = rt6_get_cookie(rt); } } @@ -839,10 +940,12 @@ static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { + enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; const struct in6_addr *saddr, *daddr; struct net *net = dev_net(skb->dev); struct udphdr *uh; struct sock *sk; + bool refcounted; u32 ulen = 0; if (!pskb_may_pull(skb, sizeof(struct udphdr))) @@ -879,21 +982,23 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, goto csum_error; /* Check if the socket is already available, e.g. due to early demux */ - sk = skb_steal_sock(skb); + sk = skb_steal_sock(skb, &refcounted); if (sk) { struct dst_entry *dst = skb_dst(skb); int ret; - if (unlikely(sk->sk_rx_dst != dst)) + if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst)) udp6_sk_rx_dst_set(sk, dst); if (!uh->check && !udp_sk(sk)->no_check6_rx) { - sock_put(sk); + if (refcounted) + sock_put(sk); goto report_csum_error; } ret = udp6_unicast_rcv_skb(sk, skb, uh); - sock_put(sk); + if (refcounted) + sock_put(sk); return ret; } @@ -912,6 +1017,8 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, return udp6_unicast_rcv_skb(sk, skb, uh); } + reason = SKB_DROP_REASON_NO_SOCKET; + if (!uh->check) goto report_csum_error; @@ -924,10 +1031,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, __UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); - kfree_skb(skb); + kfree_skb_reason(skb, reason); return 0; short_packet: + if (reason == SKB_DROP_REASON_NOT_SPECIFIED) + reason = SKB_DROP_REASON_PKT_TOO_SMALL; net_dbg_ratelimited("UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n", proto == IPPROTO_UDPLITE ? "-Lite" : "", saddr, ntohs(uh->source), @@ -938,10 +1047,12 @@ short_packet: report_csum_error: udp6_csum_zero_error(skb); csum_error: + if (reason == SKB_DROP_REASON_NOT_SPECIFIED) + reason = SKB_DROP_REASON_UDP_CSUM; __UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); discard: __UDP6_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); - kfree_skb(skb); + kfree_skb_reason(skb, reason); return 0; } @@ -960,7 +1071,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { if (sk->sk_state == TCP_ESTABLISHED && - INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif, sdif)) + inet6_match(net, sk, rmt_addr, loc_addr, ports, dif, sdif)) return sk; /* Only check first socket in chain */ break; @@ -968,7 +1079,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, return NULL; } -INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) +void udp_v6_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); const struct udphdr *uh; @@ -996,10 +1107,10 @@ INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_efree; - dst = READ_ONCE(sk->sk_rx_dst); + dst = rcu_dereference(sk->sk_rx_dst); if (dst) - dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); + dst = dst_check(dst, sk->sk_rx_dst_cookie); if (dst) { /* set noref for now. * any place which wants to hold dst has to call @@ -1040,7 +1151,7 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, * bytes that are out of the bound specified by user in addr_len. */ if (uaddr->sa_family == AF_INET) { - if (__ipv6_only_sock(sk)) + if (ipv6_only_sock(sk)) return -EAFNOSUPPORT; return udp_pre_connect(sk, uaddr, addr_len); } @@ -1056,6 +1167,9 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, * @sk: socket we are sending on * @skb: sk_buff containing the filled-in UDP header * (checksum field must be zeroed out) + * @saddr: source address + * @daddr: destination address + * @len: length of packet */ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, const struct in6_addr *saddr, @@ -1127,7 +1241,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -EINVAL; } - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); return -EINVAL; } @@ -1187,23 +1301,17 @@ static int udp_v6_push_pending_frames(struct sock *sk) { struct sk_buff *skb; struct udp_sock *up = udp_sk(sk); - struct flowi6 fl6; int err = 0; if (up->pending == AF_INET) return udp_push_pending_frames(sk); - /* ip6_finish_skb will release the cork, so make a copy of - * fl6 here. - */ - fl6 = inet_sk(sk)->cork.fl.u.ip6; - skb = ip6_finish_skb(sk); if (!skb) goto out; - err = udp_v6_send_skb(skb, &fl6, &inet_sk(sk)->cork.base); - + err = udp_v6_send_skb(skb, &inet_sk(sk)->cork.fl.u.ip6, + &inet_sk(sk)->cork.base); out: up->len = 0; up->pending = 0; @@ -1221,19 +1329,20 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct ipv6_txoptions *opt = NULL; struct ipv6_txoptions *opt_to_free = NULL; struct ip6_flowlabel *flowlabel = NULL; - struct flowi6 fl6; + struct inet_cork_full cork; + struct flowi6 *fl6 = &cork.fl.u.ip6; struct dst_entry *dst; struct ipcm6_cookie ipc6; int addr_len = msg->msg_namelen; bool connected = false; int ulen = len; - int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; + int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE; int err; int is_udplite = IS_UDPLITE(sk); int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); ipcm6_init(&ipc6); - ipc6.gso_size = up->gso_size; + ipc6.gso_size = READ_ONCE(up->gso_size); ipc6.sockc.tsflags = sk->sk_tsflags; ipc6.sockc.mark = sk->sk_mark; @@ -1278,15 +1387,12 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) msg->msg_name = &sin; msg->msg_namelen = sizeof(sin); do_udp_sendmsg: - if (__ipv6_only_sock(sk)) + if (ipv6_only_sock(sk)) return -ENETUNREACH; return udp_sendmsg(sk, msg, len); } } - if (up->pending == AF_INET) - return udp_sendmsg(sk, msg, len); - /* Rough check on arithmetic overflow, better check is made in ip6_append_data(). */ @@ -1295,6 +1401,8 @@ do_udp_sendmsg: getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; if (up->pending) { + if (up->pending == AF_INET) + return udp_sendmsg(sk, msg, len); /* * There are pending frames. * The socket lock must be held while it's corked. @@ -1312,19 +1420,19 @@ do_udp_sendmsg: } ulen += sizeof(struct udphdr); - memset(&fl6, 0, sizeof(fl6)); + memset(fl6, 0, sizeof(*fl6)); if (sin6) { if (sin6->sin6_port == 0) return -EINVAL; - fl6.fl6_dport = sin6->sin6_port; + fl6->fl6_dport = sin6->sin6_port; daddr = &sin6->sin6_addr; if (np->sndflow) { - fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; - if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); + fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; } @@ -1341,25 +1449,24 @@ do_udp_sendmsg: if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr))) - fl6.flowi6_oif = sin6->sin6_scope_id; + fl6->flowi6_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - fl6.fl6_dport = inet->inet_dport; + fl6->fl6_dport = inet->inet_dport; daddr = &sk->sk_v6_daddr; - fl6.flowlabel = np->flow_label; + fl6->flowlabel = np->flow_label; connected = true; } - if (!fl6.flowi6_oif) - fl6.flowi6_oif = sk->sk_bound_dev_if; + if (!fl6->flowi6_oif) + fl6->flowi6_oif = READ_ONCE(sk->sk_bound_dev_if); - if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; + if (!fl6->flowi6_oif) + fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; - fl6.flowi6_mark = ipc6.sockc.mark; - fl6.flowi6_uid = sk->sk_uid; + fl6->flowi6_uid = sk->sk_uid; if (msg->msg_controllen) { opt = &opt_space; @@ -1369,14 +1476,14 @@ do_udp_sendmsg: err = udp_cmsg_send(sk, msg, &ipc6.gso_size); if (err > 0) - err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, fl6, &ipc6); if (err < 0) { fl6_sock_release(flowlabel); return err; } - if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { - flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); + if ((fl6->flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { + flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; } @@ -1393,15 +1500,17 @@ do_udp_sendmsg: opt = ipv6_fixup_options(&opt_space, opt); ipc6.opt = opt; - fl6.flowi6_proto = sk->sk_protocol; - fl6.daddr = *daddr; - if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) - fl6.saddr = np->saddr; - fl6.fl6_sport = inet->inet_sport; + fl6->flowi6_proto = sk->sk_protocol; + fl6->flowi6_mark = ipc6.sockc.mark; + fl6->daddr = *daddr; + if (ipv6_addr_any(&fl6->saddr) && !ipv6_addr_any(&np->saddr)) + fl6->saddr = np->saddr; + fl6->fl6_sport = inet->inet_sport; - if (cgroup_bpf_enabled && !connected) { + if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) { err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, - (struct sockaddr *)sin6, &fl6.saddr); + (struct sockaddr *)sin6, + &fl6->saddr); if (err) goto out_no_dst; if (sin6) { @@ -1417,32 +1526,32 @@ do_udp_sendmsg: err = -EINVAL; goto out_no_dst; } - fl6.fl6_dport = sin6->sin6_port; - fl6.daddr = sin6->sin6_addr; + fl6->fl6_dport = sin6->sin6_port; + fl6->daddr = sin6->sin6_addr; } } - if (ipv6_addr_any(&fl6.daddr)) - fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ + if (ipv6_addr_any(&fl6->daddr)) + fl6->daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ - final_p = fl6_update_dst(&fl6, opt, &final); + final_p = fl6_update_dst(fl6, opt, &final); if (final_p) connected = false; - if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) { - fl6.flowi6_oif = np->mcast_oif; + if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) { + fl6->flowi6_oif = np->mcast_oif; connected = false; - } else if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->ucast_oif; + } else if (!fl6->flowi6_oif) + fl6->flowi6_oif = np->ucast_oif; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); if (ipc6.tclass < 0) ipc6.tclass = np->tclass; - fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); + fl6->flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6->flowlabel); - dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, connected); + dst = ip6_sk_dst_lookup_flow(sk, fl6, final_p, connected); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; @@ -1450,7 +1559,7 @@ do_udp_sendmsg: } if (ipc6.hlimit < 0) - ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); + ipc6.hlimit = ip6_sk_dst_hoplimit(np, fl6, dst); if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; @@ -1458,17 +1567,17 @@ back_from_confirm: /* Lockless fast path for the non-corking case */ if (!corkreq) { - struct inet_cork_full cork; struct sk_buff *skb; skb = ip6_make_skb(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, - &fl6, (struct rt6_info *)dst, + (struct rt6_info *)dst, msg->msg_flags, &cork); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) - err = udp_v6_send_skb(skb, &fl6, &cork.base); - goto out; + err = udp_v6_send_skb(skb, fl6, &cork.base); + /* ip6_make_skb steals dst reference */ + goto out_no_dst; } lock_sock(sk); @@ -1489,7 +1598,7 @@ do_append_data: ipc6.dontfrag = np->dontfrag; up->len += ulen; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), - &ipc6, &fl6, (struct rt6_info *)dst, + &ipc6, fl6, (struct rt6_info *)dst, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_v6_flush_pending_frames(sk); @@ -1524,7 +1633,7 @@ out_no_dst: do_confirm: if (msg->msg_flags & MSG_PROBE) - dst_confirm_neigh(dst, &fl6.daddr); + dst_confirm_neigh(dst, &fl6->daddr); if (!(msg->msg_flags&MSG_PROBE) || len) goto back_from_confirm; err = 0; @@ -1535,6 +1644,9 @@ void udpv6_destroy_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); lock_sock(sk); + + /* protects from races with udp_abort() */ + sock_set_flag(sk, SOCK_DEAD); udp_v6_flush_pending_frames(sk); release_sock(sk); @@ -1545,8 +1657,10 @@ void udpv6_destroy_sock(struct sock *sk) if (encap_destroy) encap_destroy(sk); } - if (up->encap_enabled) + if (up->encap_enabled) { static_branch_dec(&udpv6_encap_needed_key); + udp_encap_disable(); + } } inet6_destroy_sock(sk); @@ -1555,26 +1669,16 @@ void udpv6_destroy_sock(struct sock *sk) /* * Socket option code for UDP */ -int udpv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) +int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, + unsigned int optlen) { if (level == SOL_UDP || level == SOL_UDPLITE) - return udp_lib_setsockopt(sk, level, optname, optval, optlen, + return udp_lib_setsockopt(sk, level, optname, + optval, optlen, udp_v6_push_pending_frames); return ipv6_setsockopt(sk, level, optname, optval, optlen); } -#ifdef CONFIG_COMPAT -int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) -{ - if (level == SOL_UDP || level == SOL_UDPLITE) - return udp_lib_setsockopt(sk, level, optname, optval, optlen, - udp_v6_push_pending_frames); - return compat_ipv6_setsockopt(sk, level, optname, optval, optlen); -} -#endif - int udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -1583,22 +1687,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname, return ipv6_getsockopt(sk, level, optname, optval, optlen); } -#ifdef CONFIG_COMPAT -int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - if (level == SOL_UDP || level == SOL_UDPLITE) - return udp_lib_getsockopt(sk, level, optname, optval, optlen); - return compat_ipv6_getsockopt(sk, level, optname, optval, optlen); -} -#endif - -/* thinking of making this const? Don't. - * early_demux can change based on sysctl. - */ -static struct inet6_protocol udpv6_protocol = { - .early_demux = udp_v6_early_demux, - .early_demux_handler = udp_v6_early_demux, +static const struct inet6_protocol udpv6_protocol = { .handler = udpv6_rcv, .err_handler = udpv6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, @@ -1658,7 +1747,7 @@ struct proto udpv6_prot = { .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, - .init = udp_init_sock, + .init = udpv6_init_sock, .destroy = udpv6_destroy_sock, .setsockopt = udpv6_setsockopt, .getsockopt = udpv6_getsockopt, @@ -1669,16 +1758,19 @@ struct proto udpv6_prot = { .unhash = udp_lib_unhash, .rehash = udp_v6_rehash, .get_port = udp_v6_get_port, + .put_port = udp_lib_unhash, +#ifdef CONFIG_BPF_SYSCALL + .psock_update_sk_prot = udp_bpf_update_proto, +#endif + .memory_allocated = &udp_memory_allocated, + .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, + .sysctl_mem = sysctl_udp_mem, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), .obj_size = sizeof(struct udp6_sock), .h.udp_table = &udp_table, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_udpv6_setsockopt, - .compat_getsockopt = compat_udpv6_getsockopt, -#endif .diag_destroy = udp_abort, }; |