aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--net/ipv4/cipso_ipv4.c12
-rw-r--r--net/ipv4/fib_frontend.c9
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/fou.c1
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c3
-rw-r--r--net/ipv4/syncookies.c1
-rw-r--r--net/ipv4/tcp_bbr.c49
-rw-r--r--net/ipv4/tcp_input.c4
-rw-r--r--net/ipv4/tcp_output.c5
-rw-r--r--net/ipv4/udp.c40
11 files changed, 90 insertions, 44 deletions
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index c4c6e1969ed0..2ae8f54cb321 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1523,9 +1523,17 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb)
int taglen;
for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 0; ) {
- if (optptr[0] == IPOPT_CIPSO)
+ switch (optptr[0]) {
+ case IPOPT_CIPSO:
return optptr;
- taglen = optptr[1];
+ case IPOPT_END:
+ return NULL;
+ case IPOPT_NOOP:
+ taglen = 1;
+ break;
+ default:
+ taglen = optptr[1];
+ }
optlen -= taglen;
optptr += taglen;
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4e678fa892dd..044d2a159a3c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1334,13 +1334,14 @@ static struct pernet_operations fib_net_ops = {
void __init ip_fib_init(void)
{
- rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
- rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
- rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
+ fib_trie_init();
register_pernet_subsys(&fib_net_ops);
+
register_netdevice_notifier(&fib_netdev_notifier);
register_inetaddr_notifier(&fib_inetaddr_notifier);
- fib_trie_init();
+ rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
+ rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
+ rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 222100103808..b8d18171cca3 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1452,7 +1452,7 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type,
&info.info);
case FIB_EVENT_NH_DEL:
- if ((IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+ if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
(fib_nh->nh_flags & RTNH_F_DEAD))
return call_fib_notifiers(dev_net(fib_nh->nh_dev),
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 8e0257d01200..1540db65241a 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -450,6 +450,7 @@ out_unlock:
out:
NAPI_GRO_CB(skb)->flush |= flush;
skb_gro_remcsum_cleanup(skb, &grc);
+ skb->remcsum_offload = 0;
return pp;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7eb252dcecee..50c74cd890bc 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -599,6 +599,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
hlen = iph->ihl * 4;
mtu = mtu - hlen; /* Size of data space */
IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
+ ll_rs = LL_RESERVED_SPACE(rt->dst.dev);
/* When frag_list is given, use it. First, check its validity:
* some transformers could create wrong frag_list or break existing
@@ -614,14 +615,15 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (first_len - hlen > mtu ||
((first_len - hlen) & 7) ||
ip_is_fragment(iph) ||
- skb_cloned(skb))
+ skb_cloned(skb) ||
+ skb_headroom(skb) < ll_rs)
goto slow_path;
skb_walk_frags(skb, frag) {
/* Correct geometry. */
if (frag->len > mtu ||
((frag->len & 7) && frag->next) ||
- skb_headroom(frag) < hlen)
+ skb_headroom(frag) < hlen + ll_rs)
goto slow_path_clean;
/* Partially cloned skb? */
@@ -711,8 +713,6 @@ slow_path:
left = skb->len - hlen; /* Space per frame */
ptr = hlen; /* Where to start from */
- ll_rs = LL_RESERVED_SPACE(rt->dst.dev);
-
/*
* Fragment the datagram.
*/
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
index 805c8ddfe860..4bbc273b45e8 100644
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -72,8 +72,7 @@ static const struct nf_chain_type filter_arp = {
.family = NFPROTO_ARP,
.owner = THIS_MODULE,
.hook_mask = (1 << NF_ARP_IN) |
- (1 << NF_ARP_OUT) |
- (1 << NF_ARP_FORWARD),
+ (1 << NF_ARP_OUT),
};
static int __init nf_tables_arp_init(void)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 0905cf04c2a4..03ad8778c395 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -335,6 +335,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
treq->ts_off = 0;
+ treq->txhash = net_tx_rndhash();
req->mss = mss;
ireq->ir_num = ntohs(th->dest);
ireq->ir_rmt_port = th->source;
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index dbcc9352a48f..69ee877574d0 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -112,7 +112,8 @@ struct bbr {
cwnd_gain:10, /* current gain for setting cwnd */
full_bw_cnt:3, /* number of rounds without large bw gains */
cycle_idx:3, /* current index in pacing_gain cycle array */
- unused_b:6;
+ has_seen_rtt:1, /* have we seen an RTT sample yet? */
+ unused_b:5;
u32 prior_cwnd; /* prior cwnd upon entering loss recovery */
u32 full_bw; /* recent bw, to estimate if pipe is full */
};
@@ -211,6 +212,35 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
return rate >> BW_SCALE;
}
+/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
+static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
+{
+ u64 rate = bw;
+
+ rate = bbr_rate_bytes_per_sec(sk, rate, gain);
+ rate = min_t(u64, rate, sk->sk_max_pacing_rate);
+ return rate;
+}
+
+/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
+static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ u64 bw;
+ u32 rtt_us;
+
+ if (tp->srtt_us) { /* any RTT sample yet? */
+ rtt_us = max(tp->srtt_us >> 3, 1U);
+ bbr->has_seen_rtt = 1;
+ } else { /* no RTT sample yet */
+ rtt_us = USEC_PER_MSEC; /* use nominal default RTT */
+ }
+ bw = (u64)tp->snd_cwnd * BW_UNIT;
+ do_div(bw, rtt_us);
+ sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
+}
+
/* Pace using current bw estimate and a gain factor. In order to help drive the
* network toward lower queues while maintaining high utilization and low
* latency, the average pacing rate aims to be slightly (~1%) lower than the
@@ -220,12 +250,13 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
*/
static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
{
+ struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
- u64 rate = bw;
+ u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain);
- rate = bbr_rate_bytes_per_sec(sk, rate, gain);
- rate = min_t(u64, rate, sk->sk_max_pacing_rate);
- if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate)
+ if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
+ bbr_init_pacing_rate_from_rtt(sk);
+ if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
sk->sk_pacing_rate = rate;
}
@@ -798,7 +829,6 @@ static void bbr_init(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
- u64 bw;
bbr->prior_cwnd = 0;
bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */
@@ -814,11 +844,8 @@ static void bbr_init(struct sock *sk)
minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */
- /* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
- bw = (u64)tp->snd_cwnd * BW_UNIT;
- do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC);
- sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */
- bbr_set_pacing_rate(sk, bw, bbr_high_gain);
+ bbr->has_seen_rtt = 0;
+ bbr_init_pacing_rate_from_rtt(sk);
bbr->restore_cwnd = 0;
bbr->round_start = 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2920e0cb09f8..dad026fcfd09 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2520,8 +2520,8 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
return;
/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
- if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
- (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
+ if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
+ (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
tp->snd_cwnd = tp->snd_ssthresh;
tp->snd_cwnd_stamp = tcp_jiffies32;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4e985dea1dd2..2f1588bf73da 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2202,9 +2202,10 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new)
{
const u32 now = tcp_jiffies32;
+ enum tcp_chrono old = tp->chrono_type;
- if (tp->chrono_type > TCP_CHRONO_UNSPEC)
- tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start;
+ if (old > TCP_CHRONO_UNSPEC)
+ tp->chrono_stat[old - 1] += now - tp->chrono_start;
tp->chrono_start = now;
tp->chrono_type = new;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 25294d43e147..e6276fa3750b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1163,34 +1163,32 @@ out:
return ret;
}
-#if BITS_PER_LONG == 64
+#define UDP_SKB_IS_STATELESS 0x80000000
+
static void udp_set_dev_scratch(struct sk_buff *skb)
{
- struct udp_dev_scratch *scratch;
+ struct udp_dev_scratch *scratch = udp_skb_scratch(skb);
BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long));
- scratch = (struct udp_dev_scratch *)&skb->dev_scratch;
- scratch->truesize = skb->truesize;
+ scratch->_tsize_state = skb->truesize;
+#if BITS_PER_LONG == 64
scratch->len = skb->len;
scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
scratch->is_linear = !skb_is_nonlinear(skb);
+#endif
+ if (likely(!skb->_skb_refdst))
+ scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
}
static int udp_skb_truesize(struct sk_buff *skb)
{
- return ((struct udp_dev_scratch *)&skb->dev_scratch)->truesize;
-}
-#else
-static void udp_set_dev_scratch(struct sk_buff *skb)
-{
- skb->dev_scratch = skb->truesize;
+ return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS;
}
-static int udp_skb_truesize(struct sk_buff *skb)
+static bool udp_skb_has_head_state(struct sk_buff *skb)
{
- return skb->dev_scratch;
+ return !(udp_skb_scratch(skb)->_tsize_state & UDP_SKB_IS_STATELESS);
}
-#endif
/* fully reclaim rmem/fwd memory allocated for skb */
static void udp_rmem_release(struct sock *sk, int size, int partial,
@@ -1388,6 +1386,11 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
unlock_sock_fast(sk, slow);
}
+ /* In the more common cases we cleared the head states previously,
+ * see __udp_queue_rcv_skb().
+ */
+ if (unlikely(udp_skb_has_head_state(skb)))
+ skb_release_head_state(skb);
consume_stateless_skb(skb);
}
EXPORT_SYMBOL_GPL(skb_consume_udp);
@@ -1779,8 +1782,12 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
sk_mark_napi_id_once(sk, skb);
}
- /* clear all pending head states while they are hot in the cache */
- skb_release_head_state(skb);
+ /* At recvmsg() time we may access skb->dst or skb->sp depending on
+ * the IP options and the cmsg flags, elsewhere can we clear all
+ * pending head states while they are hot in the cache
+ */
+ if (likely(IPCB(skb)->opt.optlen == 0 && !skb_sec_path(skb)))
+ skb_release_head_state(skb);
rc = __udp_enqueue_schedule_skb(sk, skb);
if (rc < 0) {
@@ -1921,7 +1928,7 @@ drop:
/* For TCP sockets, sk_rx_dst is protected by socket lock
* For UDP, we use xchg() to guard against concurrent changes.
*/
-static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
+void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
{
struct dst_entry *old;
@@ -1930,6 +1937,7 @@ static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
dst_release(old);
}
}
+EXPORT_SYMBOL(udp_sk_rx_dst_set);
/*
* Multicasts and broadcasts go to each listener.