aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--net/ipv4/fib_frontend.c3
-rw-r--r--net/ipv4/fib_lookup.h7
-rw-r--r--net/ipv4/fib_semantics.c6
-rw-r--r--net/ipv4/fib_trie.c22
-rw-r--r--net/ipv4/ip_output.c26
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/netfilter/Kconfig4
-rw-r--r--net/ipv4/ping.c12
-rw-r--r--net/ipv4/raw.c5
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv4/tcp.c31
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_ipv4.c2
13 files changed, 84 insertions, 42 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4d61ddd8a0ec..85117b45216d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -436,6 +436,9 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
if (net->ipv4.fib_has_custom_local_routes ||
fib4_has_custom_rules(net))
goto full_check;
+ /* Within the same container, it is regarded as a martian source,
+ * and the same host but different containers are not.
+ */
if (inet_lookup_ifaddr_rcu(net, src))
return -EINVAL;
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index e184bcb19943..78e40ea42e58 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -16,10 +16,9 @@ struct fib_alias {
u8 fa_slen;
u32 tb_id;
s16 fa_default;
- u8 offload:1,
- trap:1,
- offload_failed:1,
- unused:5;
+ u8 offload;
+ u8 trap;
+ u8 offload_failed;
struct rcu_head rcu;
};
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index b4589861b84c..2dd375f7407b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -525,9 +525,9 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
fri.dst_len = dst_len;
fri.tos = fa->fa_tos;
fri.type = fa->fa_type;
- fri.offload = fa->offload;
- fri.trap = fa->trap;
- fri.offload_failed = fa->offload_failed;
+ fri.offload = READ_ONCE(fa->offload);
+ fri.trap = READ_ONCE(fa->trap);
+ fri.offload_failed = READ_ONCE(fa->offload_failed);
err = fib_dump_info(skb, info->portid, seq, event, &fri, nlm_flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in fib_nlmsg_size() */
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 8060524f4256..f7f74d5c14da 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1047,19 +1047,23 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
if (!fa_match)
goto out;
- if (fa_match->offload == fri->offload && fa_match->trap == fri->trap &&
- fa_match->offload_failed == fri->offload_failed)
+ /* These are paired with the WRITE_ONCE() happening in this function.
+ * The reason is that we are only protected by RCU at this point.
+ */
+ if (READ_ONCE(fa_match->offload) == fri->offload &&
+ READ_ONCE(fa_match->trap) == fri->trap &&
+ READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
goto out;
- fa_match->offload = fri->offload;
- fa_match->trap = fri->trap;
+ WRITE_ONCE(fa_match->offload, fri->offload);
+ WRITE_ONCE(fa_match->trap, fri->trap);
/* 2 means send notifications only if offload_failed was changed. */
if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 &&
- fa_match->offload_failed == fri->offload_failed)
+ READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
goto out;
- fa_match->offload_failed = fri->offload_failed;
+ WRITE_ONCE(fa_match->offload_failed, fri->offload_failed);
if (!net->ipv4.sysctl_fib_notify_on_flag_change)
goto out;
@@ -2297,9 +2301,9 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
fri.dst_len = KEYLENGTH - fa->fa_slen;
fri.tos = fa->fa_tos;
fri.type = fa->fa_type;
- fri.offload = fa->offload;
- fri.trap = fa->trap;
- fri.offload_failed = fa->offload_failed;
+ fri.offload = READ_ONCE(fa->offload);
+ fri.trap = READ_ONCE(fa->trap);
+ fri.offload_failed = READ_ONCE(fa->offload_failed);
err = fib_dump_info(skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 57c1d8431386..139cec29ed06 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -162,12 +162,19 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
iph->saddr = saddr;
iph->protocol = sk->sk_protocol;
- if (ip_dont_fragment(sk, &rt->dst)) {
+ /* Do not bother generating IPID for small packets (eg SYNACK) */
+ if (skb->len <= IPV4_MIN_MTU || ip_dont_fragment(sk, &rt->dst)) {
iph->frag_off = htons(IP_DF);
iph->id = 0;
} else {
iph->frag_off = 0;
- __ip_select_ident(net, iph, 1);
+ /* TCP packets here are SYNACK with fat IPv4/TCP options.
+ * Avoid using the hashed IP ident generator.
+ */
+ if (sk->sk_protocol == IPPROTO_TCP)
+ iph->id = (__force __be16)prandom_u32();
+ else
+ __ip_select_ident(net, iph, 1);
}
if (opt && opt->opt.optlen) {
@@ -825,15 +832,24 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
/* Everything is OK. Generate! */
ip_fraglist_init(skb, iph, hlen, &iter);
- if (iter.frag)
- ip_options_fragment(iter.frag);
-
for (;;) {
/* Prepare header of the next frame,
* before previous one went down. */
if (iter.frag) {
+ bool first_frag = (iter.offset == 0);
+
IPCB(iter.frag)->flags = IPCB(skb)->flags;
ip_fraglist_prepare(skb, &iter);
+ if (first_frag && IPCB(skb)->opt.optlen) {
+ /* ipcb->opt is not populated for frags
+ * coming from __ip_make_skb(),
+ * ip_options_fragment() needs optlen
+ */
+ IPCB(iter.frag)->opt.optlen =
+ IPCB(skb)->opt.optlen;
+ ip_options_fragment(iter.frag);
+ ip_send_check(iter.iph);
+ }
}
skb->tstamp = tstamp;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 07274619b9ea..29bbe2b08ae9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -256,7 +256,9 @@ static int __net_init ipmr_rules_init(struct net *net)
return 0;
err2:
+ rtnl_lock();
ipmr_free_table(mrt);
+ rtnl_unlock();
err1:
fib_rules_unregister(ops);
return err;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 67087f95579f..aab384126f61 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -58,10 +58,6 @@ config NF_TABLES_ARP
endif # NF_TABLES
-config NF_FLOW_TABLE_IPV4
- tristate
- select NF_FLOW_TABLE_INET
-
config NF_DUP_IPV4
tristate "Netfilter IPv4 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 0e56df3a45e2..3a5994b50571 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -172,16 +172,23 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
struct sock *sk = NULL;
struct inet_sock *isk;
struct hlist_nulls_node *hnode;
- int dif = skb->dev->ifindex;
+ int dif, sdif;
if (skb->protocol == htons(ETH_P_IP)) {
+ dif = inet_iif(skb);
+ sdif = inet_sdif(skb);
pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n",
(int)ident, &ip_hdr(skb)->daddr, dif);
#if IS_ENABLED(CONFIG_IPV6)
} else if (skb->protocol == htons(ETH_P_IPV6)) {
+ dif = inet6_iif(skb);
+ sdif = inet6_sdif(skb);
pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n",
(int)ident, &ipv6_hdr(skb)->daddr, dif);
#endif
+ } else {
+ pr_err("ping: protocol(%x) is not supported\n", ntohs(skb->protocol));
+ return NULL;
}
read_lock_bh(&ping_table.lock);
@@ -220,7 +227,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
continue;
}
- if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
+ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
+ sk->sk_bound_dev_if != sdif)
continue;
sock_hold(sk);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 9eb5fc247868..9f97b9cbf7b3 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -722,6 +722,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
int ret = -EINVAL;
int chk_addr_ret;
+ lock_sock(sk);
if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
goto out;
@@ -741,7 +742,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_saddr = 0; /* Use device */
sk_dst_reset(sk);
ret = 0;
-out: return ret;
+out:
+ release_sock(sk);
+ return ret;
}
/*
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ff6f91cdb6c4..f33ad1f383b6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3395,8 +3395,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fa->fa_tos == fri.tos &&
fa->fa_info == res.fi &&
fa->fa_type == fri.type) {
- fri.offload = fa->offload;
- fri.trap = fa->trap;
+ fri.offload = READ_ONCE(fa->offload);
+ fri.trap = READ_ONCE(fa->trap);
break;
}
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3b75836db19b..02cb275e5487 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -842,6 +842,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
}
release_sock(sk);
+ sk_defer_free_flush(sk);
if (spliced)
return spliced;
@@ -936,6 +937,22 @@ void tcp_remove_empty_skb(struct sock *sk)
}
}
+/* skb changing from pure zc to mixed, must charge zc */
+static int tcp_downgrade_zcopy_pure(struct sock *sk, struct sk_buff *skb)
+{
+ if (unlikely(skb_zcopy_pure(skb))) {
+ u32 extra = skb->truesize -
+ SKB_TRUESIZE(skb_end_offset(skb));
+
+ if (!sk_wmem_schedule(sk, extra))
+ return -ENOMEM;
+
+ sk_mem_charge(sk, extra);
+ skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY;
+ }
+ return 0;
+}
+
static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags,
struct page *page, int offset, size_t *size)
{
@@ -971,7 +988,7 @@ new_segment:
tcp_mark_push(tp, skb);
goto new_segment;
}
- if (!sk_wmem_schedule(sk, copy))
+ if (tcp_downgrade_zcopy_pure(sk, skb) || !sk_wmem_schedule(sk, copy))
return NULL;
if (can_coalesce) {
@@ -1319,16 +1336,8 @@ new_segment:
copy = min_t(int, copy, pfrag->size - pfrag->offset);
- /* skb changing from pure zc to mixed, must charge zc */
- if (unlikely(skb_zcopy_pure(skb))) {
- if (!sk_wmem_schedule(sk, skb->data_len))
- goto wait_for_space;
-
- sk_mem_charge(sk, skb->data_len);
- skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY;
- }
-
- if (!sk_wmem_schedule(sk, copy))
+ if (tcp_downgrade_zcopy_pure(sk, skb) ||
+ !sk_wmem_schedule(sk, copy))
goto wait_for_space;
err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index dc49a3d551eb..bfe4112e000c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1660,6 +1660,8 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
(mss != tcp_skb_seglen(skb)))
goto out;
+ if (!tcp_skb_can_collapse(prev, skb))
+ goto out;
len = skb->len;
pcount = tcp_skb_pcount(skb);
if (tcp_skb_shift(prev, skb, pcount, len))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b53476e78c84..fec656f5a39e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2095,7 +2095,7 @@ process:
nf_reset_ct(skb);
if (tcp_filter(sk, skb)) {
- drop_reason = SKB_DROP_REASON_TCP_FILTER;
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
goto discard_and_relse;
}
th = (const struct tcphdr *)skb->data;