diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/cipso_ipv4.c | 20 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 4 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 7 | ||||
-rw-r--r-- | net/ipv4/inet_fragment.c | 44 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 2 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 2 | ||||
-rw-r--r-- | net/ipv4/ip_input.c | 9 | ||||
-rw-r--r-- | net/ipv4/ip_options.c | 22 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel.c | 6 | ||||
-rw-r--r-- | net/ipv4/netlink.c | 17 | ||||
-rw-r--r-- | net/ipv4/route.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 5 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 21 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 17 |
15 files changed, 107 insertions, 77 deletions
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 777fa3b7fb13..f0165c5f376b 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -667,7 +667,8 @@ static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level) case CIPSO_V4_MAP_PASS: return 0; case CIPSO_V4_MAP_TRANS: - if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL) + if ((level < doi_def->map.std->lvl.cipso_size) && + (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)) return 0; break; } @@ -1735,13 +1736,26 @@ validate_return: */ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) { + unsigned char optbuf[sizeof(struct ip_options) + 40]; + struct ip_options *opt = (struct ip_options *)optbuf; + if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES) return; + /* + * We might be called above the IP layer, + * so we can not use icmp_send and IPCB here. + */ + + memset(opt, 0, sizeof(struct ip_options)); + opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); + if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL)) + return; + if (gateway) - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0); + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt); else - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0); + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt); } /** diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index fe4f6a624238..ed14ec245584 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -710,6 +710,10 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, case RTA_GATEWAY: cfg->fc_gw = nla_get_be32(attr); break; + case RTA_VIA: + NL_SET_ERR_MSG(extack, "IPv4 does not support RTA_VIA attribute"); + err = -EINVAL; + goto errout; case RTA_PRIORITY: cfg->fc_priority = nla_get_u32(attr); break; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 364cfe5e414b..f3a5893b1e86 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -570,7 +570,8 @@ relookup_failed: * MUST reply to only the first fragment. */ -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, + const struct ip_options *opt) { struct iphdr *iph; int room; @@ -691,7 +692,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) iph->tos; mark = IP4_REPLY_MARK(net, skb_in->mark); - if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in)) + if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt)) goto out_unlock; @@ -742,7 +743,7 @@ out_bh_enable: local_bh_enable(); out:; } -EXPORT_SYMBOL(icmp_send); +EXPORT_SYMBOL(__icmp_send); static void icmp_socket_deliver(struct sk_buff *skb, u32 info) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 9f69411251d0..737808e27f8b 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -203,7 +203,6 @@ EXPORT_SYMBOL(inet_frag_rbtree_purge); void inet_frag_destroy(struct inet_frag_queue *q) { - struct sk_buff *fp; struct netns_frags *nf; unsigned int sum, sum_truesize = 0; struct inet_frags *f; @@ -212,20 +211,9 @@ void inet_frag_destroy(struct inet_frag_queue *q) WARN_ON(del_timer(&q->timer) != 0); /* Release all fragment data. */ - fp = q->fragments; nf = q->net; f = nf->f; - if (fp) { - do { - struct sk_buff *xp = fp->next; - - sum_truesize += fp->truesize; - kfree_skb(fp); - fp = xp; - } while (fp); - } else { - sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments); - } + sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments); sum = sum_truesize + f->qsize; call_rcu(&q->rcu, inet_frag_destroy_rcu); @@ -489,26 +477,20 @@ EXPORT_SYMBOL(inet_frag_reasm_finish); struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q) { - struct sk_buff *head; + struct sk_buff *head, *skb; - if (q->fragments) { - head = q->fragments; - q->fragments = head->next; - } else { - struct sk_buff *skb; + head = skb_rb_first(&q->rb_fragments); + if (!head) + return NULL; + skb = FRAG_CB(head)->next_frag; + if (skb) + rb_replace_node(&head->rbnode, &skb->rbnode, + &q->rb_fragments); + else + rb_erase(&head->rbnode, &q->rb_fragments); + memset(&head->rbnode, 0, sizeof(head->rbnode)); + barrier(); - head = skb_rb_first(&q->rb_fragments); - if (!head) - return NULL; - skb = FRAG_CB(head)->next_frag; - if (skb) - rb_replace_node(&head->rbnode, &skb->rbnode, - &q->rb_fragments); - else - rb_erase(&head->rbnode, &q->rb_fragments); - memset(&head->rbnode, 0, sizeof(head->rbnode)); - barrier(); - } if (head == q->fragments_tail) q->fragments_tail = NULL; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 486ecb0aeb87..cf2b0a6a3337 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -261,7 +261,6 @@ static int ip_frag_reinit(struct ipq *qp) qp->q.flags = 0; qp->q.len = 0; qp->q.meat = 0; - qp->q.fragments = NULL; qp->q.rb_fragments = RB_ROOT; qp->q.fragments_tail = NULL; qp->q.last_run_head = NULL; @@ -451,7 +450,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, ip_send_check(iph); __IP_INC_STATS(net, IPSTATS_MIB_REASMOKS); - qp->q.fragments = NULL; qp->q.rb_fragments = RB_ROOT; qp->q.fragments_tail = NULL; qp->q.last_run_head = NULL; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0f4925a0d6b2..fd219f7bd3ea 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -578,7 +578,7 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) key = &info->key; ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src, tunnel_id_to_key32(key->tun_id), key->tos, 0, - skb->mark); + skb->mark, skb_get_hash(skb)); rt = ip_route_output_key(dev_net(dev), &fl4); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index cd6b5694f99e..ecce2dc78f17 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -307,11 +307,10 @@ drop: } static int ip_rcv_finish_core(struct net *net, struct sock *sk, - struct sk_buff *skb) + struct sk_buff *skb, struct net_device *dev) { const struct iphdr *iph = ip_hdr(skb); int (*edemux)(struct sk_buff *skb); - struct net_device *dev = skb->dev; struct rtable *rt; int err; @@ -400,6 +399,7 @@ drop_error: static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + struct net_device *dev = skb->dev; int ret; /* if ingress device is enslaved to an L3 master device pass the @@ -409,7 +409,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) if (!skb) return NET_RX_SUCCESS; - ret = ip_rcv_finish_core(net, sk, skb); + ret = ip_rcv_finish_core(net, sk, skb, dev); if (ret != NET_RX_DROP) ret = dst_input(skb); return ret; @@ -545,6 +545,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk, INIT_LIST_HEAD(&sublist); list_for_each_entry_safe(skb, next, head, list) { + struct net_device *dev = skb->dev; struct dst_entry *dst; skb_list_del_init(skb); @@ -554,7 +555,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk, skb = l3mdev_ip_rcv(skb); if (!skb) continue; - if (ip_rcv_finish_core(net, sk, skb) == NET_RX_DROP) + if (ip_rcv_finish_core(net, sk, skb, dev) == NET_RX_DROP) continue; dst = skb_dst(skb); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index ed194d46c00e..32a35043c9f5 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -251,8 +251,9 @@ static void spec_dst_fill(__be32 *spec_dst, struct sk_buff *skb) * If opt == NULL, then skb->data should point to IP header. */ -int ip_options_compile(struct net *net, - struct ip_options *opt, struct sk_buff *skb) +int __ip_options_compile(struct net *net, + struct ip_options *opt, struct sk_buff *skb, + __be32 *info) { __be32 spec_dst = htonl(INADDR_ANY); unsigned char *pp_ptr = NULL; @@ -468,11 +469,22 @@ eol: return 0; error: - if (skb) { - icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24)); - } + if (info) + *info = htonl((pp_ptr-iph)<<24); return -EINVAL; } + +int ip_options_compile(struct net *net, + struct ip_options *opt, struct sk_buff *skb) +{ + int ret; + __be32 info; + + ret = __ip_options_compile(net, opt, skb, &info); + if (ret != 0 && skb) + icmp_send(skb, ICMP_PARAMETERPROB, 0, info); + return ret; +} EXPORT_SYMBOL(ip_options_compile); /* diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 2973067b831d..2756fb725bf0 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -310,7 +310,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr, iph->saddr, tunnel->parms.o_key, RT_TOS(iph->tos), tunnel->parms.link, - tunnel->fwmark); + tunnel->fwmark, 0); rt = ip_route_output_key(tunnel->net, &fl4); if (!IS_ERR(rt)) { @@ -584,7 +584,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, tunnel_id_to_key32(key->tun_id), RT_TOS(tos), - 0, skb->mark); + 0, skb->mark, skb_get_hash(skb)); if (tunnel->encap.type != TUNNEL_ENCAP_NONE) goto tx_error; @@ -744,7 +744,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, - tunnel->fwmark); + tunnel->fwmark, skb_get_hash(skb)); if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) goto tx_error; diff --git a/net/ipv4/netlink.c b/net/ipv4/netlink.c index f86bb4f06609..d8e3a1fb8e82 100644 --- a/net/ipv4/netlink.c +++ b/net/ipv4/netlink.c @@ -3,9 +3,10 @@ #include <linux/types.h> #include <net/net_namespace.h> #include <net/netlink.h> +#include <linux/in6.h> #include <net/ip.h> -int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, +int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family, struct netlink_ext_ack *extack) { *ip_proto = nla_get_u8(attr); @@ -13,11 +14,19 @@ int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, switch (*ip_proto) { case IPPROTO_TCP: case IPPROTO_UDP: + return 0; case IPPROTO_ICMP: + if (family != AF_INET) + break; + return 0; +#if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ICMPV6: + if (family != AF_INET6) + break; return 0; - default: - NL_SET_ERR_MSG(extack, "Unsupported ip proto"); - return -EOPNOTSUPP; +#endif } + NL_SET_ERR_MSG(extack, "Unsupported ip proto"); + return -EOPNOTSUPP; } EXPORT_SYMBOL_GPL(rtm_getroute_parse_ip_proto); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ecc12a768191..738ff0a1a048 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1820,6 +1820,7 @@ out: int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys) { + u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0; struct flow_keys hash_keys; u32 mhash; @@ -1870,6 +1871,9 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, } mhash = flow_hash_from_keys(&hash_keys); + if (multipath_hash) + mhash = jhash_2words(mhash, multipath_hash, 0); + return mhash >> 1; } #endif /* CONFIG_IP_ROUTE_MULTIPATH */ @@ -2872,7 +2876,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (tb[RTA_IP_PROTO]) { err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO], - &ip_proto, extack); + &ip_proto, AF_INET, extack); if (err) return err; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 769508c75dce..ad07dd71063d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1412,7 +1412,8 @@ do_fault: /* It is the one place in all of TCP, except connection * reset, where we can be unlinking the send_head. */ - tcp_check_send_head(sk, skb); + if (tcp_write_queue_empty(sk)) + tcp_chrono_stop(sk, TCP_CHRONO_BUSY); sk_wmem_free_skb(sk, skb); } @@ -3698,7 +3699,7 @@ bool tcp_alloc_md5sig_pool(void) if (!tcp_md5sig_pool_populated) { __tcp_alloc_md5sig_pool(); if (tcp_md5sig_pool_populated) - static_key_slow_inc(&tcp_md5_needed); + static_branch_inc(&tcp_md5_needed); } mutex_unlock(&tcp_md5sig_mutex); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4010ae3644f3..831d844a27ca 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -973,7 +973,7 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) * We need to maintain these in the sk structure. */ -struct static_key tcp_md5_needed __read_mostly; +DEFINE_STATIC_KEY_FALSE(tcp_md5_needed); EXPORT_SYMBOL(tcp_md5_needed); /* Find the Key structure for an address. */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 182595e2d40f..79900f783e0d 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -294,12 +294,15 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) * so the timewait ack generating code has the key. */ do { - struct tcp_md5sig_key *key; tcptw->tw_md5_key = NULL; - key = tp->af_specific->md5_lookup(sk, sk); - if (key) { - tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); - BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool()); + if (static_branch_unlikely(&tcp_md5_needed)) { + struct tcp_md5sig_key *key; + + key = tp->af_specific->md5_lookup(sk, sk); + if (key) { + tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); + BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool()); + } } } while (0); #endif @@ -338,10 +341,12 @@ EXPORT_SYMBOL(tcp_time_wait); void tcp_twsk_destructor(struct sock *sk) { #ifdef CONFIG_TCP_MD5SIG - struct tcp_timewait_sock *twsk = tcp_twsk(sk); + if (static_branch_unlikely(&tcp_md5_needed)) { + struct tcp_timewait_sock *twsk = tcp_twsk(sk); - if (twsk->tw_md5_key) - kfree_rcu(twsk->tw_md5_key, rcu); + if (twsk->tw_md5_key) + kfree_rcu(twsk->tw_md5_key, rcu); + } #endif } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e72aa0ff5785..4522579aaca2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -597,7 +597,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, *md5 = NULL; #ifdef CONFIG_TCP_MD5SIG - if (static_key_false(&tcp_md5_needed) && + if (static_branch_unlikely(&tcp_md5_needed) && rcu_access_pointer(tp->md5sig_info)) { *md5 = tp->af_specific->md5_lookup(sk, sk); if (*md5) { @@ -734,7 +734,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb *md5 = NULL; #ifdef CONFIG_TCP_MD5SIG - if (static_key_false(&tcp_md5_needed) && + if (static_branch_unlikely(&tcp_md5_needed) && rcu_access_pointer(tp->md5sig_info)) { *md5 = tp->af_specific->md5_lookup(sk, sk); if (*md5) { @@ -1846,17 +1846,17 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp, * know that all the data is in scatter-gather pages, and that the * packet has never been sent out before (and thus is not cloned). */ -static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue, - struct sk_buff *skb, unsigned int len, +static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, unsigned int mss_now, gfp_t gfp) { - struct sk_buff *buff; int nlen = skb->len - len; + struct sk_buff *buff; u8 flags; /* All of a TSO frame must be composed of paged data. */ if (skb->len != skb->data_len) - return tcp_fragment(sk, tcp_queue, skb, len, mss_now, gfp); + return tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE, + skb, len, mss_now, gfp); buff = sk_stream_alloc_skb(sk, 0, gfp, true); if (unlikely(!buff)) @@ -1892,7 +1892,7 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue, /* Link BUFF into the send queue. */ __skb_header_release(buff); - tcp_insert_write_queue_after(skb, buff, sk, tcp_queue); + tcp_insert_write_queue_after(skb, buff, sk, TCP_FRAG_IN_WRITE_QUEUE); return 0; } @@ -2391,8 +2391,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, nonagle); if (skb->len > limit && - unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE, - skb, limit, mss_now, gfp))) + unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) break; if (tcp_small_queue_check(sk, skb, 0)) |