aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/cipso_ipv4.c20
-rw-r--r--net/ipv4/fib_frontend.c4
-rw-r--r--net/ipv4/icmp.c7
-rw-r--r--net/ipv4/inet_fragment.c44
-rw-r--r--net/ipv4/ip_fragment.c2
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ip_input.c9
-rw-r--r--net/ipv4/ip_options.c22
-rw-r--r--net/ipv4/ip_tunnel.c6
-rw-r--r--net/ipv4/netlink.c17
-rw-r--r--net/ipv4/route.c6
-rw-r--r--net/ipv4/tcp.c5
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_minisocks.c21
-rw-r--r--net/ipv4/tcp_output.c17
15 files changed, 107 insertions, 77 deletions
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 777fa3b7fb13..f0165c5f376b 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -667,7 +667,8 @@ static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level)
case CIPSO_V4_MAP_PASS:
return 0;
case CIPSO_V4_MAP_TRANS:
- if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)
+ if ((level < doi_def->map.std->lvl.cipso_size) &&
+ (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL))
return 0;
break;
}
@@ -1735,13 +1736,26 @@ validate_return:
*/
void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
{
+ unsigned char optbuf[sizeof(struct ip_options) + 40];
+ struct ip_options *opt = (struct ip_options *)optbuf;
+
if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
return;
+ /*
+ * We might be called above the IP layer,
+ * so we can not use icmp_send and IPCB here.
+ */
+
+ memset(opt, 0, sizeof(struct ip_options));
+ opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
+ if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL))
+ return;
+
if (gateway)
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
+ __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt);
else
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
+ __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt);
}
/**
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index fe4f6a624238..ed14ec245584 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -710,6 +710,10 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
case RTA_GATEWAY:
cfg->fc_gw = nla_get_be32(attr);
break;
+ case RTA_VIA:
+ NL_SET_ERR_MSG(extack, "IPv4 does not support RTA_VIA attribute");
+ err = -EINVAL;
+ goto errout;
case RTA_PRIORITY:
cfg->fc_priority = nla_get_u32(attr);
break;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 364cfe5e414b..f3a5893b1e86 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -570,7 +570,8 @@ relookup_failed:
* MUST reply to only the first fragment.
*/
-void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
+ const struct ip_options *opt)
{
struct iphdr *iph;
int room;
@@ -691,7 +692,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
iph->tos;
mark = IP4_REPLY_MARK(net, skb_in->mark);
- if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in))
+ if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt))
goto out_unlock;
@@ -742,7 +743,7 @@ out_bh_enable:
local_bh_enable();
out:;
}
-EXPORT_SYMBOL(icmp_send);
+EXPORT_SYMBOL(__icmp_send);
static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 9f69411251d0..737808e27f8b 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -203,7 +203,6 @@ EXPORT_SYMBOL(inet_frag_rbtree_purge);
void inet_frag_destroy(struct inet_frag_queue *q)
{
- struct sk_buff *fp;
struct netns_frags *nf;
unsigned int sum, sum_truesize = 0;
struct inet_frags *f;
@@ -212,20 +211,9 @@ void inet_frag_destroy(struct inet_frag_queue *q)
WARN_ON(del_timer(&q->timer) != 0);
/* Release all fragment data. */
- fp = q->fragments;
nf = q->net;
f = nf->f;
- if (fp) {
- do {
- struct sk_buff *xp = fp->next;
-
- sum_truesize += fp->truesize;
- kfree_skb(fp);
- fp = xp;
- } while (fp);
- } else {
- sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
- }
+ sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
sum = sum_truesize + f->qsize;
call_rcu(&q->rcu, inet_frag_destroy_rcu);
@@ -489,26 +477,20 @@ EXPORT_SYMBOL(inet_frag_reasm_finish);
struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q)
{
- struct sk_buff *head;
+ struct sk_buff *head, *skb;
- if (q->fragments) {
- head = q->fragments;
- q->fragments = head->next;
- } else {
- struct sk_buff *skb;
+ head = skb_rb_first(&q->rb_fragments);
+ if (!head)
+ return NULL;
+ skb = FRAG_CB(head)->next_frag;
+ if (skb)
+ rb_replace_node(&head->rbnode, &skb->rbnode,
+ &q->rb_fragments);
+ else
+ rb_erase(&head->rbnode, &q->rb_fragments);
+ memset(&head->rbnode, 0, sizeof(head->rbnode));
+ barrier();
- head = skb_rb_first(&q->rb_fragments);
- if (!head)
- return NULL;
- skb = FRAG_CB(head)->next_frag;
- if (skb)
- rb_replace_node(&head->rbnode, &skb->rbnode,
- &q->rb_fragments);
- else
- rb_erase(&head->rbnode, &q->rb_fragments);
- memset(&head->rbnode, 0, sizeof(head->rbnode));
- barrier();
- }
if (head == q->fragments_tail)
q->fragments_tail = NULL;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 486ecb0aeb87..cf2b0a6a3337 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -261,7 +261,6 @@ static int ip_frag_reinit(struct ipq *qp)
qp->q.flags = 0;
qp->q.len = 0;
qp->q.meat = 0;
- qp->q.fragments = NULL;
qp->q.rb_fragments = RB_ROOT;
qp->q.fragments_tail = NULL;
qp->q.last_run_head = NULL;
@@ -451,7 +450,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
ip_send_check(iph);
__IP_INC_STATS(net, IPSTATS_MIB_REASMOKS);
- qp->q.fragments = NULL;
qp->q.rb_fragments = RB_ROOT;
qp->q.fragments_tail = NULL;
qp->q.last_run_head = NULL;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0f4925a0d6b2..fd219f7bd3ea 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -578,7 +578,7 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
key = &info->key;
ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
tunnel_id_to_key32(key->tun_id), key->tos, 0,
- skb->mark);
+ skb->mark, skb_get_hash(skb));
rt = ip_route_output_key(dev_net(dev), &fl4);
if (IS_ERR(rt))
return PTR_ERR(rt);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index cd6b5694f99e..ecce2dc78f17 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -307,11 +307,10 @@ drop:
}
static int ip_rcv_finish_core(struct net *net, struct sock *sk,
- struct sk_buff *skb)
+ struct sk_buff *skb, struct net_device *dev)
{
const struct iphdr *iph = ip_hdr(skb);
int (*edemux)(struct sk_buff *skb);
- struct net_device *dev = skb->dev;
struct rtable *rt;
int err;
@@ -400,6 +399,7 @@ drop_error:
static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ struct net_device *dev = skb->dev;
int ret;
/* if ingress device is enslaved to an L3 master device pass the
@@ -409,7 +409,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
if (!skb)
return NET_RX_SUCCESS;
- ret = ip_rcv_finish_core(net, sk, skb);
+ ret = ip_rcv_finish_core(net, sk, skb, dev);
if (ret != NET_RX_DROP)
ret = dst_input(skb);
return ret;
@@ -545,6 +545,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
+ struct net_device *dev = skb->dev;
struct dst_entry *dst;
skb_list_del_init(skb);
@@ -554,7 +555,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
skb = l3mdev_ip_rcv(skb);
if (!skb)
continue;
- if (ip_rcv_finish_core(net, sk, skb) == NET_RX_DROP)
+ if (ip_rcv_finish_core(net, sk, skb, dev) == NET_RX_DROP)
continue;
dst = skb_dst(skb);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ed194d46c00e..32a35043c9f5 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -251,8 +251,9 @@ static void spec_dst_fill(__be32 *spec_dst, struct sk_buff *skb)
* If opt == NULL, then skb->data should point to IP header.
*/
-int ip_options_compile(struct net *net,
- struct ip_options *opt, struct sk_buff *skb)
+int __ip_options_compile(struct net *net,
+ struct ip_options *opt, struct sk_buff *skb,
+ __be32 *info)
{
__be32 spec_dst = htonl(INADDR_ANY);
unsigned char *pp_ptr = NULL;
@@ -468,11 +469,22 @@ eol:
return 0;
error:
- if (skb) {
- icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24));
- }
+ if (info)
+ *info = htonl((pp_ptr-iph)<<24);
return -EINVAL;
}
+
+int ip_options_compile(struct net *net,
+ struct ip_options *opt, struct sk_buff *skb)
+{
+ int ret;
+ __be32 info;
+
+ ret = __ip_options_compile(net, opt, skb, &info);
+ if (ret != 0 && skb)
+ icmp_send(skb, ICMP_PARAMETERPROB, 0, info);
+ return ret;
+}
EXPORT_SYMBOL(ip_options_compile);
/*
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 2973067b831d..2756fb725bf0 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -310,7 +310,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
iph->saddr, tunnel->parms.o_key,
RT_TOS(iph->tos), tunnel->parms.link,
- tunnel->fwmark);
+ tunnel->fwmark, 0);
rt = ip_route_output_key(tunnel->net, &fl4);
if (!IS_ERR(rt)) {
@@ -584,7 +584,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
- 0, skb->mark);
+ 0, skb->mark, skb_get_hash(skb));
if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
goto tx_error;
@@ -744,7 +744,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
- tunnel->fwmark);
+ tunnel->fwmark, skb_get_hash(skb));
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
goto tx_error;
diff --git a/net/ipv4/netlink.c b/net/ipv4/netlink.c
index f86bb4f06609..d8e3a1fb8e82 100644
--- a/net/ipv4/netlink.c
+++ b/net/ipv4/netlink.c
@@ -3,9 +3,10 @@
#include <linux/types.h>
#include <net/net_namespace.h>
#include <net/netlink.h>
+#include <linux/in6.h>
#include <net/ip.h>
-int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto,
+int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family,
struct netlink_ext_ack *extack)
{
*ip_proto = nla_get_u8(attr);
@@ -13,11 +14,19 @@ int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto,
switch (*ip_proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
+ return 0;
case IPPROTO_ICMP:
+ if (family != AF_INET)
+ break;
+ return 0;
+#if IS_ENABLED(CONFIG_IPV6)
+ case IPPROTO_ICMPV6:
+ if (family != AF_INET6)
+ break;
return 0;
- default:
- NL_SET_ERR_MSG(extack, "Unsupported ip proto");
- return -EOPNOTSUPP;
+#endif
}
+ NL_SET_ERR_MSG(extack, "Unsupported ip proto");
+ return -EOPNOTSUPP;
}
EXPORT_SYMBOL_GPL(rtm_getroute_parse_ip_proto);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ecc12a768191..738ff0a1a048 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1820,6 +1820,7 @@ out:
int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
const struct sk_buff *skb, struct flow_keys *flkeys)
{
+ u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0;
struct flow_keys hash_keys;
u32 mhash;
@@ -1870,6 +1871,9 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
}
mhash = flow_hash_from_keys(&hash_keys);
+ if (multipath_hash)
+ mhash = jhash_2words(mhash, multipath_hash, 0);
+
return mhash >> 1;
}
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
@@ -2872,7 +2876,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (tb[RTA_IP_PROTO]) {
err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
- &ip_proto, extack);
+ &ip_proto, AF_INET, extack);
if (err)
return err;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 769508c75dce..ad07dd71063d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1412,7 +1412,8 @@ do_fault:
/* It is the one place in all of TCP, except connection
* reset, where we can be unlinking the send_head.
*/
- tcp_check_send_head(sk, skb);
+ if (tcp_write_queue_empty(sk))
+ tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
sk_wmem_free_skb(sk, skb);
}
@@ -3698,7 +3699,7 @@ bool tcp_alloc_md5sig_pool(void)
if (!tcp_md5sig_pool_populated) {
__tcp_alloc_md5sig_pool();
if (tcp_md5sig_pool_populated)
- static_key_slow_inc(&tcp_md5_needed);
+ static_branch_inc(&tcp_md5_needed);
}
mutex_unlock(&tcp_md5sig_mutex);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4010ae3644f3..831d844a27ca 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -973,7 +973,7 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
* We need to maintain these in the sk structure.
*/
-struct static_key tcp_md5_needed __read_mostly;
+DEFINE_STATIC_KEY_FALSE(tcp_md5_needed);
EXPORT_SYMBOL(tcp_md5_needed);
/* Find the Key structure for an address. */
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 182595e2d40f..79900f783e0d 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -294,12 +294,15 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
* so the timewait ack generating code has the key.
*/
do {
- struct tcp_md5sig_key *key;
tcptw->tw_md5_key = NULL;
- key = tp->af_specific->md5_lookup(sk, sk);
- if (key) {
- tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
- BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool());
+ if (static_branch_unlikely(&tcp_md5_needed)) {
+ struct tcp_md5sig_key *key;
+
+ key = tp->af_specific->md5_lookup(sk, sk);
+ if (key) {
+ tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
+ BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool());
+ }
}
} while (0);
#endif
@@ -338,10 +341,12 @@ EXPORT_SYMBOL(tcp_time_wait);
void tcp_twsk_destructor(struct sock *sk)
{
#ifdef CONFIG_TCP_MD5SIG
- struct tcp_timewait_sock *twsk = tcp_twsk(sk);
+ if (static_branch_unlikely(&tcp_md5_needed)) {
+ struct tcp_timewait_sock *twsk = tcp_twsk(sk);
- if (twsk->tw_md5_key)
- kfree_rcu(twsk->tw_md5_key, rcu);
+ if (twsk->tw_md5_key)
+ kfree_rcu(twsk->tw_md5_key, rcu);
+ }
#endif
}
EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e72aa0ff5785..4522579aaca2 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -597,7 +597,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
*md5 = NULL;
#ifdef CONFIG_TCP_MD5SIG
- if (static_key_false(&tcp_md5_needed) &&
+ if (static_branch_unlikely(&tcp_md5_needed) &&
rcu_access_pointer(tp->md5sig_info)) {
*md5 = tp->af_specific->md5_lookup(sk, sk);
if (*md5) {
@@ -734,7 +734,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
*md5 = NULL;
#ifdef CONFIG_TCP_MD5SIG
- if (static_key_false(&tcp_md5_needed) &&
+ if (static_branch_unlikely(&tcp_md5_needed) &&
rcu_access_pointer(tp->md5sig_info)) {
*md5 = tp->af_specific->md5_lookup(sk, sk);
if (*md5) {
@@ -1846,17 +1846,17 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
* know that all the data is in scatter-gather pages, and that the
* packet has never been sent out before (and thus is not cloned).
*/
-static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
- struct sk_buff *skb, unsigned int len,
+static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
unsigned int mss_now, gfp_t gfp)
{
- struct sk_buff *buff;
int nlen = skb->len - len;
+ struct sk_buff *buff;
u8 flags;
/* All of a TSO frame must be composed of paged data. */
if (skb->len != skb->data_len)
- return tcp_fragment(sk, tcp_queue, skb, len, mss_now, gfp);
+ return tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
+ skb, len, mss_now, gfp);
buff = sk_stream_alloc_skb(sk, 0, gfp, true);
if (unlikely(!buff))
@@ -1892,7 +1892,7 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
/* Link BUFF into the send queue. */
__skb_header_release(buff);
- tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
+ tcp_insert_write_queue_after(skb, buff, sk, TCP_FRAG_IN_WRITE_QUEUE);
return 0;
}
@@ -2391,8 +2391,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
nonagle);
if (skb->len > limit &&
- unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
- skb, limit, mss_now, gfp)))
+ unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
break;
if (tcp_small_queue_check(sk, skb, 0))