aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c7
-rw-r--r--net/ipv4/cipso_ipv4.c12
-rw-r--r--net/ipv4/esp4.c13
-rw-r--r--net/ipv4/esp4_offload.c2
-rw-r--r--net/ipv4/fib_frontend.c9
-rw-r--r--net/ipv4/fib_semantics.c14
-rw-r--r--net/ipv4/fou.c1
-rw-r--r--net/ipv4/igmp.c16
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/route.c19
-rw-r--r--net/ipv4/tcp_input.c37
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/tcp_output.c32
-rw-r--r--net/ipv4/tcp_timer.c3
-rw-r--r--net/ipv4/tcp_ulp.c14
-rw-r--r--net/ipv4/udp.c46
-rw-r--r--net/ipv4/udp_offload.c2
17 files changed, 134 insertions, 105 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 76c2077c3f5b..2e548eca3489 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1731,6 +1731,13 @@ static __net_init int inet_init_net(struct net *net)
net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
#endif
+ /* Some igmp sysctl, whose values are always used */
+ net->ipv4.sysctl_igmp_max_memberships = 20;
+ net->ipv4.sysctl_igmp_max_msf = 10;
+ /* IGMP reports for link-local multicast groups are enabled by default */
+ net->ipv4.sysctl_igmp_llm_reports = 1;
+ net->ipv4.sysctl_igmp_qrv = 2;
+
return 0;
}
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index c4c6e1969ed0..2ae8f54cb321 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1523,9 +1523,17 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb)
int taglen;
for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 0; ) {
- if (optptr[0] == IPOPT_CIPSO)
+ switch (optptr[0]) {
+ case IPOPT_CIPSO:
return optptr;
- taglen = optptr[1];
+ case IPOPT_END:
+ return NULL;
+ case IPOPT_NOOP:
+ taglen = 1;
+ break;
+ default:
+ taglen = optptr[1];
+ }
optlen -= taglen;
optptr += taglen;
}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 0cbee0a666ff..dbb31a942dfa 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -381,7 +381,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
(unsigned char *)esph - skb->data,
assoclen + ivlen + esp->clen + alen);
if (unlikely(err < 0))
- goto error;
+ goto error_free;
if (!esp->inplace) {
int allocsize;
@@ -392,7 +392,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
spin_lock_bh(&x->lock);
if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
spin_unlock_bh(&x->lock);
- goto error;
+ goto error_free;
}
skb_shinfo(skb)->nr_frags = 1;
@@ -409,7 +409,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
(unsigned char *)esph - skb->data,
assoclen + ivlen + esp->clen + alen);
if (unlikely(err < 0))
- goto error;
+ goto error_free;
}
if ((x->props.flags & XFRM_STATE_ESN))
@@ -442,8 +442,9 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
if (sg != dsg)
esp_ssg_unref(x, tmp);
- kfree(tmp);
+error_free:
+ kfree(tmp);
error:
return err;
}
@@ -695,8 +696,10 @@ skip_cow:
sg_init_table(sg, nfrags);
err = skb_to_sgvec(skb, sg, 0, skb->len);
- if (unlikely(err < 0))
+ if (unlikely(err < 0)) {
+ kfree(tmp);
goto out;
+ }
skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index e0666016a764..50112324fa5c 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -257,7 +257,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
err = esp_output_tail(x, skb, &esp);
- if (err < 0)
+ if (err)
return err;
secpath_reset(skb);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4e678fa892dd..044d2a159a3c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1334,13 +1334,14 @@ static struct pernet_operations fib_net_ops = {
void __init ip_fib_init(void)
{
- rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
- rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
- rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
+ fib_trie_init();
register_pernet_subsys(&fib_net_ops);
+
register_netdevice_notifier(&fib_netdev_notifier);
register_inetaddr_notifier(&fib_inetaddr_notifier);
- fib_trie_init();
+ rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
+ rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
+ rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 222100103808..ec3a9ce281a6 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1083,15 +1083,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
if (!fi)
goto failure;
- fib_info_cnt++;
if (cfg->fc_mx) {
fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL);
- if (!fi->fib_metrics)
- goto failure;
+ if (unlikely(!fi->fib_metrics)) {
+ kfree(fi);
+ return ERR_PTR(err);
+ }
atomic_set(&fi->fib_metrics->refcnt, 1);
- } else
+ } else {
fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
-
+ }
+ fib_info_cnt++;
fi->fib_net = net;
fi->fib_protocol = cfg->fc_protocol;
fi->fib_scope = cfg->fc_scope;
@@ -1452,7 +1454,7 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type,
&info.info);
case FIB_EVENT_NH_DEL:
- if ((IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+ if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
(fib_nh->nh_flags & RTNH_F_DEAD))
return call_fib_notifiers(dev_net(fib_nh->nh_dev),
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 8e0257d01200..1540db65241a 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -450,6 +450,7 @@ out_unlock:
out:
NAPI_GRO_CB(skb)->flush |= flush;
skb_gro_remcsum_cleanup(skb, &grc);
+ skb->remcsum_offload = 0;
return pp;
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 28f14afd0dd3..caf2f1101d02 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1007,10 +1007,18 @@ int igmp_rcv(struct sk_buff *skb)
{
/* This basically follows the spec line by line -- see RFC1112 */
struct igmphdr *ih;
- struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+ struct net_device *dev = skb->dev;
+ struct in_device *in_dev;
int len = skb->len;
bool dropped = true;
+ if (netif_is_l3_master(dev)) {
+ dev = dev_get_by_index_rcu(dev_net(dev), IPCB(skb)->iif);
+ if (!dev)
+ goto drop;
+ }
+
+ in_dev = __in_dev_get_rcu(dev);
if (!in_dev)
goto drop;
@@ -2974,12 +2982,6 @@ static int __net_init igmp_net_init(struct net *net)
goto out_sock;
}
- /* Sysctl initialization */
- net->ipv4.sysctl_igmp_max_memberships = 20;
- net->ipv4.sysctl_igmp_max_msf = 10;
- /* IGMP reports for link-local multicast groups are enabled by default */
- net->ipv4.sysctl_igmp_llm_reports = 1;
- net->ipv4.sysctl_igmp_qrv = 2;
return 0;
out_sock:
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 50c74cd890bc..e153c40c2436 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -965,11 +965,12 @@ static int __ip_append_data(struct sock *sk,
csummode = CHECKSUM_PARTIAL;
cork->length += length;
- if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) ||
- (skb && skb_is_gso(skb))) &&
+ if ((skb && skb_is_gso(skb)) ||
+ (((length + (skb ? skb->len : fragheaderlen)) > mtu) &&
+ (skb_queue_len(queue) <= 1) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
- (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
+ (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx)) {
err = ip_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen, transhdrlen,
maxfraglen, flags);
@@ -1288,6 +1289,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
return -EINVAL;
if ((size + skb->len > mtu) &&
+ (skb_queue_len(&sk->sk_write_queue) == 1) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO)) {
if (skb->ip_summed != CHECKSUM_PARTIAL)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c816cd53f7fc..2331de20ca50 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1267,7 +1267,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
if (mtu)
return mtu;
- mtu = dst->dev->mtu;
+ mtu = READ_ONCE(dst->dev->mtu);
if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
if (rt->rt_uses_gateway && mtu > 576)
@@ -2750,26 +2750,34 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
err = 0;
if (IS_ERR(rt))
err = PTR_ERR(rt);
+ else
+ skb_dst_set(skb, &rt->dst);
}
if (err)
goto errout_free;
- skb_dst_set(skb, &rt->dst);
if (rtm->rtm_flags & RTM_F_NOTIFY)
rt->rt_flags |= RTCF_NOTIFY;
if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
table_id = rt->rt_table_id;
- if (rtm->rtm_flags & RTM_F_FIB_MATCH)
+ if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
+ if (!res.fi) {
+ err = fib_props[res.type].error;
+ if (!err)
+ err = -EHOSTUNREACH;
+ goto errout_free;
+ }
err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
rt->rt_type, res.prefix, res.prefixlen,
fl4.flowi4_tos, res.fi, 0);
- else
+ } else {
err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
+ }
if (err < 0)
goto errout_free;
@@ -2979,8 +2987,7 @@ static __net_init int rt_genid_init(struct net *net)
{
atomic_set(&net->ipv4.rt_genid, 0);
atomic_set(&net->fnhe_genid, 0);
- get_random_bytes(&net->ipv4.dev_addr_genid,
- sizeof(net->ipv4.dev_addr_genid));
+ atomic_set(&net->ipv4.dev_addr_genid, get_random_int());
return 0;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2920e0cb09f8..bab7f0493098 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -107,6 +107,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */
#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
+#define FLAG_SET_XMIT_TIMER 0x1000 /* Set TLP or RTO timer */
#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */
@@ -2520,8 +2521,8 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
return;
/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
- if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
- (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
+ if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
+ (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
tp->snd_cwnd = tp->snd_ssthresh;
tp->snd_cwnd_stamp = tcp_jiffies32;
}
@@ -3004,21 +3005,24 @@ void tcp_rearm_rto(struct sock *sk)
/* Offset the time elapsed after installing regular RTO */
if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
- struct sk_buff *skb = tcp_write_queue_head(sk);
- u64 rto_time_stamp = skb->skb_mstamp +
- jiffies_to_usecs(rto);
- s64 delta_us = rto_time_stamp - tp->tcp_mstamp;
+ s64 delta_us = tcp_rto_delta_us(sk);
/* delta_us may not be positive if the socket is locked
* when the retrans timer fires and is rescheduled.
*/
- if (delta_us > 0)
- rto = usecs_to_jiffies(delta_us);
+ rto = usecs_to_jiffies(max_t(int, delta_us, 1));
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
TCP_RTO_MAX);
}
}
+/* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */
+static void tcp_set_xmit_timer(struct sock *sk)
+{
+ if (!tcp_schedule_loss_probe(sk))
+ tcp_rearm_rto(sk);
+}
+
/* If we get here, the whole TSO packet has not been acked. */
static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
{
@@ -3180,7 +3184,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
ca_rtt_us, sack->rate);
if (flag & FLAG_ACKED) {
- tcp_rearm_rto(sk);
+ flag |= FLAG_SET_XMIT_TIMER; /* set TLP or RTO timer */
if (unlikely(icsk->icsk_mtup.probe_size &&
!after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
tcp_mtup_probe_success(sk);
@@ -3208,7 +3212,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
* after when the head was last (re)transmitted. Otherwise the
* timeout may continue to extend in loss recovery.
*/
- tcp_rearm_rto(sk);
+ flag |= FLAG_SET_XMIT_TIMER; /* set TLP or RTO timer */
}
if (icsk->icsk_ca_ops->pkts_acked) {
@@ -3580,9 +3584,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (after(ack, tp->snd_nxt))
goto invalid_ack;
- if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
- tcp_rearm_rto(sk);
-
if (after(ack, prior_snd_una)) {
flag |= FLAG_SND_UNA_ADVANCED;
icsk->icsk_retransmits = 0;
@@ -3647,18 +3648,20 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
&sack_state);
+ if (tp->tlp_high_seq)
+ tcp_process_tlp_ack(sk, ack, flag);
+ /* If needed, reset TLP/RTO timer; RACK may later override this. */
+ if (flag & FLAG_SET_XMIT_TIMER)
+ tcp_set_xmit_timer(sk);
+
if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
}
- if (tp->tlp_high_seq)
- tcp_process_tlp_ack(sk, ack, flag);
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
sk_dst_confirm(sk);
- if (icsk->icsk_pending == ICSK_TIME_RETRANS)
- tcp_schedule_loss_probe(sk);
delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
lost = tp->lost - lost; /* freshly marked lost */
tcp_rate_gen(sk, delivered, lost, sack_state.rate);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a20e7f03d5f7..e9252c7df809 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1722,6 +1722,8 @@ process:
*/
sock_hold(sk);
refcounted = true;
+ if (tcp_filter(sk, skb))
+ goto discard_and_relse;
nsk = tcp_check_req(sk, skb, req, false);
if (!nsk) {
reqsk_put(req);
@@ -1729,8 +1731,6 @@ process:
}
if (nsk == sk) {
reqsk_put(req);
- } else if (tcp_filter(sk, skb)) {
- goto discard_and_relse;
} else if (tcp_child_process(sk, nsk, skb)) {
tcp_v4_send_reset(nsk, skb);
goto discard_and_relse;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4e985dea1dd2..b7661a68d498 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2202,9 +2202,10 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new)
{
const u32 now = tcp_jiffies32;
+ enum tcp_chrono old = tp->chrono_type;
- if (tp->chrono_type > TCP_CHRONO_UNSPEC)
- tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start;
+ if (old > TCP_CHRONO_UNSPEC)
+ tp->chrono_stat[old - 1] += now - tp->chrono_start;
tp->chrono_start = now;
tp->chrono_type = new;
}
@@ -2376,24 +2377,15 @@ bool tcp_schedule_loss_probe(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- u32 timeout, tlp_time_stamp, rto_time_stamp;
u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
+ u32 timeout, rto_delta_us;
- /* No consecutive loss probes. */
- if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
- tcp_rearm_rto(sk);
- return false;
- }
/* Don't do any loss probe on a Fast Open connection before 3WHS
* finishes.
*/
if (tp->fastopen_rsk)
return false;
- /* TLP is only scheduled when next timer event is RTO. */
- if (icsk->icsk_pending != ICSK_TIME_RETRANS)
- return false;
-
/* Schedule a loss probe in 2*RTT for SACK capable connections
* in Open state, that are either limited by cwnd or application.
*/
@@ -2416,14 +2408,10 @@ bool tcp_schedule_loss_probe(struct sock *sk)
(rtt + (rtt >> 1) + TCP_DELACK_MAX));
timeout = max_t(u32, timeout, msecs_to_jiffies(10));
- /* If RTO is shorter, just schedule TLP in its place. */
- tlp_time_stamp = tcp_jiffies32 + timeout;
- rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
- if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
- s32 delta = rto_time_stamp - tcp_jiffies32;
- if (delta > 0)
- timeout = delta;
- }
+ /* If the RTO formula yields an earlier time, then use that time. */
+ rto_delta_us = tcp_rto_delta_us(sk); /* How far in future is RTO? */
+ if (rto_delta_us > 0)
+ timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us));
inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
TCP_RTO_MAX);
@@ -3448,6 +3436,10 @@ int tcp_connect(struct sock *sk)
int err;
tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB);
+
+ if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
+ return -EHOSTUNREACH; /* Routing failure or similar. */
+
tcp_connect_init(sk);
if (unlikely(tp->repair)) {
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c0feeeef962a..e906014890b6 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -652,7 +652,8 @@ static void tcp_keepalive_timer (unsigned long data)
goto death;
}
- if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
+ if (!sock_flag(sk, SOCK_KEEPOPEN) ||
+ ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)))
goto out;
elapsed = keepalive_time_when(tp);
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 2417f55374c5..6bb9e14c710a 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -122,14 +122,14 @@ int tcp_set_ulp(struct sock *sk, const char *name)
ulp_ops = __tcp_ulp_find_autoload(name);
if (!ulp_ops)
- err = -ENOENT;
- else
- err = ulp_ops->init(sk);
+ return -ENOENT;
- if (err)
- goto out;
+ err = ulp_ops->init(sk);
+ if (err) {
+ module_put(ulp_ops->owner);
+ return err;
+ }
icsk->icsk_ulp_ops = ulp_ops;
- out:
- return err;
+ return 0;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b057653ceca9..cd1d044a7fa5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -802,7 +802,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
if (is_udplite) /* UDP-Lite */
csum = udplite_csum(skb);
- else if (sk->sk_no_check_tx) { /* UDP csum disabled */
+ else if (sk->sk_no_check_tx && !skb_is_gso(skb)) { /* UDP csum off */
skb->ip_summed = CHECKSUM_NONE;
goto send;
@@ -1163,34 +1163,32 @@ out:
return ret;
}
-#if BITS_PER_LONG == 64
+#define UDP_SKB_IS_STATELESS 0x80000000
+
static void udp_set_dev_scratch(struct sk_buff *skb)
{
- struct udp_dev_scratch *scratch;
+ struct udp_dev_scratch *scratch = udp_skb_scratch(skb);
BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long));
- scratch = (struct udp_dev_scratch *)&skb->dev_scratch;
- scratch->truesize = skb->truesize;
+ scratch->_tsize_state = skb->truesize;
+#if BITS_PER_LONG == 64
scratch->len = skb->len;
scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
scratch->is_linear = !skb_is_nonlinear(skb);
+#endif
+ if (likely(!skb->_skb_refdst))
+ scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
}
static int udp_skb_truesize(struct sk_buff *skb)
{
- return ((struct udp_dev_scratch *)&skb->dev_scratch)->truesize;
-}
-#else
-static void udp_set_dev_scratch(struct sk_buff *skb)
-{
- skb->dev_scratch = skb->truesize;
+ return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS;
}
-static int udp_skb_truesize(struct sk_buff *skb)
+static bool udp_skb_has_head_state(struct sk_buff *skb)
{
- return skb->dev_scratch;
+ return !(udp_skb_scratch(skb)->_tsize_state & UDP_SKB_IS_STATELESS);
}
-#endif
/* fully reclaim rmem/fwd memory allocated for skb */
static void udp_rmem_release(struct sock *sk, int size, int partial,
@@ -1388,10 +1386,10 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
unlock_sock_fast(sk, slow);
}
- /* we cleared the head states previously only if the skb lacks any IP
- * options, see __udp_queue_rcv_skb().
+ /* In the more common cases we cleared the head states previously,
+ * see __udp_queue_rcv_skb().
*/
- if (unlikely(IPCB(skb)->opt.optlen > 0))
+ if (unlikely(udp_skb_has_head_state(skb)))
skb_release_head_state(skb);
consume_stateless_skb(skb);
}
@@ -1576,7 +1574,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
return ip_recv_error(sk, msg, len, addr_len);
try_again:
- peeking = off = sk_peek_offset(sk, flags);
+ peeking = flags & MSG_PEEK;
+ off = sk_peek_offset(sk, flags);
skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
if (!skb)
return err;
@@ -1784,11 +1783,11 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
sk_mark_napi_id_once(sk, skb);
}
- /* At recvmsg() time we need skb->dst to process IP options-related
- * cmsg, elsewhere can we clear all pending head states while they are
- * hot in the cache
+ /* At recvmsg() time we may access skb->dst or skb->sp depending on
+ * the IP options and the cmsg flags, elsewhere can we clear all
+ * pending head states while they are hot in the cache
*/
- if (likely(IPCB(skb)->opt.optlen == 0))
+ if (likely(IPCB(skb)->opt.optlen == 0 && !skb_sec_path(skb)))
skb_release_head_state(skb);
rc = __udp_enqueue_schedule_skb(sk, skb);
@@ -1930,7 +1929,7 @@ drop:
/* For TCP sockets, sk_rx_dst is protected by socket lock
* For UDP, we use xchg() to guard against concurrent changes.
*/
-static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
+void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
{
struct dst_entry *old;
@@ -1939,6 +1938,7 @@ static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
dst_release(old);
}
}
+EXPORT_SYMBOL(udp_sk_rx_dst_set);
/*
* Multicasts and broadcasts go to each listener.
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 781250151d40..0932c85b42af 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -235,7 +235,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
- skb->ip_summed = CHECKSUM_NONE;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
/* If there is no outer header we can fake a checksum offload
* due to the fact that we have already done the checksum in