aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c27
-rw-r--r--net/core/devlink.c17
-rw-r--r--net/core/sock.c7
-rw-r--r--net/core/sock_reuseport.c15
-rw-r--r--net/ipv4/inet_connection_sock.c15
-rw-r--r--net/ipv4/ip_gre.c7
-rw-r--r--net/ipv4/ip_output.c32
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/tcp_minisocks.c3
-rw-r--r--net/ipv6/ip6_output.c40
-rw-r--r--net/ipv6/ip6_tunnel.c1
-rw-r--r--net/ipv6/sit.c4
-rw-r--r--net/mptcp/protocol.c2
-rw-r--r--net/netfilter/Makefile2
-rw-r--r--net/netfilter/nf_conntrack_proto.c16
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c14
-rw-r--r--net/netfilter/nf_conntrack_proto_icmp.c7
-rw-r--r--net/netfilter/nf_conntrack_proto_icmpv6.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c23
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c6
-rw-r--r--net/netfilter/nf_tables_core.c1
-rw-r--r--net/netfilter/nfnetlink_hook.c4
-rw-r--r--net/netfilter/nft_exthdr.c7
-rw-r--r--net/netfilter/nft_last.c87
-rw-r--r--net/sched/sch_generic.c23
-rw-r--r--net/sctp/sm_statefuns.c5
-rw-r--r--net/sctp/transport.c11
-rw-r--r--net/smc/smc_tx.c5
-rw-r--r--net/xfrm/xfrm_output.c41
31 files changed, 318 insertions, 113 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 50531a2d0b20..991d09b67bd9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3852,10 +3852,33 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_calculate_pkt_len(skb, q);
if (q->flags & TCQ_F_NOLOCK) {
+ if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) &&
+ qdisc_run_begin(q)) {
+ /* Retest nolock_qdisc_is_empty() within the protection
+ * of q->seqlock to protect from racing with requeuing.
+ */
+ if (unlikely(!nolock_qdisc_is_empty(q))) {
+ rc = q->enqueue(skb, q, &to_free) &
+ NET_XMIT_MASK;
+ __qdisc_run(q);
+ qdisc_run_end(q);
+
+ goto no_lock_out;
+ }
+
+ qdisc_bstats_cpu_update(q, skb);
+ if (sch_direct_xmit(skb, q, dev, txq, NULL, true) &&
+ !nolock_qdisc_is_empty(q))
+ __qdisc_run(q);
+
+ qdisc_run_end(q);
+ return NET_XMIT_SUCCESS;
+ }
+
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
- if (likely(!netif_xmit_frozen_or_stopped(txq)))
- qdisc_run(q);
+ qdisc_run(q);
+no_lock_out:
if (unlikely(to_free))
kfree_skb_list(to_free);
return rc;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 566ddd147633..8fdd04f00fd7 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2709,23 +2709,16 @@ static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
struct netlink_ext_ack *extack)
{
struct devlink_rate *devlink_rate;
- u16 old_mode;
- int err;
-
- if (!devlink->ops->eswitch_mode_get)
- return -EOPNOTSUPP;
- err = devlink->ops->eswitch_mode_get(devlink, &old_mode);
- if (err)
- return err;
-
- if (old_mode == mode)
- return 0;
+ /* Take the lock to sync with devlink_rate_nodes_destroy() */
+ mutex_lock(&devlink->lock);
list_for_each_entry(devlink_rate, &devlink->rate_list, list)
if (devlink_rate_is_node(devlink_rate)) {
+ mutex_unlock(&devlink->lock);
NL_SET_ERR_MSG_MOD(extack, "Rate node(s) exists.");
return -EBUSY;
}
+ mutex_unlock(&devlink->lock);
return 0;
}
@@ -9275,6 +9268,8 @@ void devlink_rate_leaf_destroy(struct devlink_port *devlink_port)
mutex_lock(&devlink->lock);
devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
+ if (devlink_rate->parent)
+ refcount_dec(&devlink_rate->parent->refcnt);
list_del(&devlink_rate->list);
devlink_port->devlink_rate = NULL;
mutex_unlock(&devlink->lock);
diff --git a/net/core/sock.c b/net/core/sock.c
index ddfa88082a2b..a2337b37eba6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1635,6 +1635,13 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_bound_dev_if;
break;
+ case SO_NETNS_COOKIE:
+ lv = sizeof(u64);
+ if (len != lv)
+ return -EINVAL;
+ v.val64 = sock_net(sk)->net_cookie;
+ break;
+
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index de5ee3ae86d5..3f00a28fe762 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -6,6 +6,7 @@
* selecting the socket index from the array of available sockets.
*/
+#include <net/ip.h>
#include <net/sock_reuseport.h>
#include <linux/bpf.h>
#include <linux/idr.h>
@@ -536,7 +537,7 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
socks = READ_ONCE(reuse->num_socks);
if (unlikely(!socks))
- goto out;
+ goto failure;
/* paired with smp_wmb() in __reuseport_add_sock() */
smp_rmb();
@@ -546,13 +547,13 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) {
if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req)
goto select_by_hash;
- goto out;
+ goto failure;
}
if (!skb) {
skb = alloc_skb(0, GFP_ATOMIC);
if (!skb)
- goto out;
+ goto failure;
allocated = true;
}
@@ -565,12 +566,18 @@ select_by_hash:
if (!nsk)
nsk = reuseport_select_sock_by_hash(reuse, hash, socks);
- if (IS_ERR_OR_NULL(nsk) || unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt)))
+ if (IS_ERR_OR_NULL(nsk) || unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt))) {
nsk = NULL;
+ goto failure;
+ }
out:
rcu_read_unlock();
return nsk;
+
+failure:
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
+ goto out;
}
EXPORT_SYMBOL(reuseport_migrate_sock);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 0eea878edc30..754013fa393b 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -703,6 +703,8 @@ static struct request_sock *inet_reqsk_clone(struct request_sock *req,
nreq = kmem_cache_alloc(req->rsk_ops->slab, GFP_ATOMIC | __GFP_NOWARN);
if (!nreq) {
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
+
/* paired with refcount_inc_not_zero() in reuseport_migrate_sock() */
sock_put(sk);
return NULL;
@@ -876,9 +878,10 @@ static void reqsk_timer_handler(struct timer_list *t)
if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) {
/* delete timer */
inet_csk_reqsk_queue_drop(sk_listener, nreq);
- goto drop;
+ goto no_ownership;
}
+ __NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQSUCCESS);
reqsk_migrate_reset(oreq);
reqsk_queue_removed(&inet_csk(oreq->rsk_listener)->icsk_accept_queue, oreq);
reqsk_put(oreq);
@@ -887,17 +890,19 @@ static void reqsk_timer_handler(struct timer_list *t)
return;
}
-drop:
/* Even if we can clone the req, we may need not retransmit any more
* SYN+ACKs (nreq->num_timeout > max_syn_ack_retries, etc), or another
* CPU may win the "own_req" race so that inet_ehash_insert() fails.
*/
if (nreq) {
+ __NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQFAILURE);
+no_ownership:
reqsk_migrate_reset(nreq);
reqsk_queue_removed(queue, nreq);
__reqsk_free(nreq);
}
+drop:
inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq);
}
@@ -1135,11 +1140,13 @@ struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
refcount_set(&nreq->rsk_refcnt, 1);
if (inet_csk_reqsk_queue_add(sk, nreq, child)) {
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQSUCCESS);
reqsk_migrate_reset(req);
reqsk_put(req);
return child;
}
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
reqsk_migrate_reset(nreq);
__reqsk_free(nreq);
} else if (inet_csk_reqsk_queue_add(sk, req, child)) {
@@ -1188,8 +1195,12 @@ void inet_csk_listen_stop(struct sock *sk)
refcount_set(&nreq->rsk_refcnt, 1);
if (inet_csk_reqsk_queue_add(nsk, nreq, child)) {
+ __NET_INC_STATS(sock_net(nsk),
+ LINUX_MIB_TCPMIGRATEREQSUCCESS);
reqsk_migrate_reset(req);
} else {
+ __NET_INC_STATS(sock_net(nsk),
+ LINUX_MIB_TCPMIGRATEREQFAILURE);
reqsk_migrate_reset(nreq);
__reqsk_free(nreq);
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a68bf4c6fe9b..12dca0c85f3c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -107,6 +107,8 @@ module_param(log_ecn_error, bool, 0644);
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
static struct rtnl_link_ops ipgre_link_ops __read_mostly;
+static const struct header_ops ipgre_header_ops;
+
static int ipgre_tunnel_init(struct net_device *dev);
static void erspan_build_header(struct sk_buff *skb,
u32 id, u32 index,
@@ -364,7 +366,10 @@ static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
raw_proto, false) < 0)
goto drop;
- if (tunnel->dev->type != ARPHRD_NONE)
+ /* Special case for ipgre_header_parse(), which expects the
+ * mac_header to point to the outer IP header.
+ */
+ if (tunnel->dev->header_ops == &ipgre_header_ops)
skb_pop_mac_header(skb);
else
skb_reset_mac_header(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c3efc7d658f6..8d8a8da3ae7e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1054,7 +1054,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int datalen;
unsigned int fraglen;
unsigned int fraggap;
- unsigned int alloclen;
+ unsigned int alloclen, alloc_extra;
unsigned int pagedlen;
struct sk_buff *skb_prev;
alloc_new_skb:
@@ -1074,35 +1074,39 @@ alloc_new_skb:
fraglen = datalen + fragheaderlen;
pagedlen = 0;
+ alloc_extra = hh_len + 15;
+ alloc_extra += exthdrlen;
+
+ /* The last fragment gets additional space at tail.
+ * Note, with MSG_MORE we overallocate on fragments,
+ * because we have no idea what fragment will be
+ * the last.
+ */
+ if (datalen == length + fraggap)
+ alloc_extra += rt->dst.trailer_len;
+
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
- else if (!paged)
+ else if (!paged &&
+ (fraglen + alloc_extra < SKB_MAX_ALLOC ||
+ !(rt->dst.dev->features & NETIF_F_SG)))
alloclen = fraglen;
else {
alloclen = min_t(int, fraglen, MAX_HEADER);
pagedlen = fraglen - alloclen;
}
- alloclen += exthdrlen;
-
- /* The last fragment gets additional space at tail.
- * Note, with MSG_MORE we overallocate on fragments,
- * because we have no idea what fragment will be
- * the last.
- */
- if (datalen == length + fraggap)
- alloclen += rt->dst.trailer_len;
+ alloclen += alloc_extra;
if (transhdrlen) {
- skb = sock_alloc_send_skb(sk,
- alloclen + hh_len + 15,
+ skb = sock_alloc_send_skb(sk, alloclen,
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
2 * sk->sk_sndbuf)
- skb = alloc_skb(alloclen + hh_len + 15,
+ skb = alloc_skb(alloclen,
sk->sk_allocation);
if (unlikely(!skb))
err = -ENOBUFS;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index d5bfa087c23a..266c65577ba6 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -242,6 +242,8 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
if (!tun_dst)
return 0;
}
+ skb_reset_mac_header(skb);
+
return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 6d46297a99f8..b0d3a09dc84e 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -295,6 +295,8 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TcpDuplicateDataRehash", LINUX_MIB_TCPDUPLICATEDATAREHASH),
SNMP_MIB_ITEM("TCPDSACKRecvSegs", LINUX_MIB_TCPDSACKRECVSEGS),
SNMP_MIB_ITEM("TCPDSACKIgnoredDubious", LINUX_MIB_TCPDSACKIGNOREDDUBIOUS),
+ SNMP_MIB_ITEM("TCPMigrateReqSuccess", LINUX_MIB_TCPMIGRATEREQSUCCESS),
+ SNMP_MIB_ITEM("TCPMigrateReqFailure", LINUX_MIB_TCPMIGRATEREQFAILURE),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f258a4c0da71..0a4f3f16140a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -786,6 +786,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
return inet_csk_complete_hashdance(sk, child, req, own_req);
listen_overflow:
+ if (sk != req->rsk_listener)
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
+
if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) {
inet_rsk(req)->acked = 1;
return NULL;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ff4f9ebcf7f6..984050f35c61 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1055,13 +1055,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
* ip6_route_output will fail given src=any saddr, though, so
* that's why we try it again later.
*/
- if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
+ if (ipv6_addr_any(&fl6->saddr)) {
struct fib6_info *from;
struct rt6_info *rt;
- bool had_dst = *dst != NULL;
- if (!had_dst)
- *dst = ip6_route_output(net, sk, fl6);
+ *dst = ip6_route_output(net, sk, fl6);
rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
rcu_read_lock();
@@ -1078,7 +1076,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
* never existed and let the SA-enabled version take
* over.
*/
- if (!had_dst && (*dst)->error) {
+ if ((*dst)->error) {
dst_release(*dst);
*dst = NULL;
}
@@ -1555,7 +1553,7 @@ emsgsize:
unsigned int datalen;
unsigned int fraglen;
unsigned int fraggap;
- unsigned int alloclen;
+ unsigned int alloclen, alloc_extra;
unsigned int pagedlen;
alloc_new_skb:
/* There's no room in the current skb */
@@ -1582,17 +1580,28 @@ alloc_new_skb:
fraglen = datalen + fragheaderlen;
pagedlen = 0;
+ alloc_extra = hh_len;
+ alloc_extra += dst_exthdrlen;
+ alloc_extra += rt->dst.trailer_len;
+
+ /* We just reserve space for fragment header.
+ * Note: this may be overallocation if the message
+ * (without MSG_MORE) fits into the MTU.
+ */
+ alloc_extra += sizeof(struct frag_hdr);
+
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
- else if (!paged)
+ else if (!paged &&
+ (fraglen + alloc_extra < SKB_MAX_ALLOC ||
+ !(rt->dst.dev->features & NETIF_F_SG)))
alloclen = fraglen;
else {
alloclen = min_t(int, fraglen, MAX_HEADER);
pagedlen = fraglen - alloclen;
}
-
- alloclen += dst_exthdrlen;
+ alloclen += alloc_extra;
if (datalen != length + fraggap) {
/*
@@ -1602,30 +1611,21 @@ alloc_new_skb:
datalen += rt->dst.trailer_len;
}
- alloclen += rt->dst.trailer_len;
fraglen = datalen + fragheaderlen;
- /*
- * We just reserve space for fragment header.
- * Note: this may be overallocation if the message
- * (without MSG_MORE) fits into the MTU.
- */
- alloclen += sizeof(struct frag_hdr);
-
copy = datalen - transhdrlen - fraggap - pagedlen;
if (copy < 0) {
err = -EINVAL;
goto error;
}
if (transhdrlen) {
- skb = sock_alloc_send_skb(sk,
- alloclen + hh_len,
+ skb = sock_alloc_send_skb(sk, alloclen,
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
2 * sk->sk_sndbuf)
- skb = alloc_skb(alloclen + hh_len,
+ skb = alloc_skb(alloclen,
sk->sk_allocation);
if (unlikely(!skb))
err = -ENOBUFS;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 288bafded998..0b8a38687ce4 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -837,6 +837,7 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
} else {
skb->dev = tunnel->dev;
+ skb_reset_mac_header(skb);
}
skb_reset_network_header(skb);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e0a39b0bb4c1..df5bea818410 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -710,6 +710,8 @@ static int ipip6_rcv(struct sk_buff *skb)
* old iph is no longer valid
*/
iph = (const struct iphdr *)skb_mac_header(skb);
+ skb_reset_mac_header(skb);
+
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
if (log_ecn_error)
@@ -780,6 +782,8 @@ static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
tpi = &ipip_tpi;
if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
+ skb_reset_mac_header(skb);
+
return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index ce0c45dfb79e..7bb82424e551 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -455,7 +455,7 @@ static void mptcp_subflow_cleanup_rbuf(struct sock *ssk)
static bool mptcp_subflow_could_cleanup(const struct sock *ssk, bool rx_empty)
{
const struct inet_connection_sock *icsk = inet_csk(ssk);
- bool ack_pending = READ_ONCE(icsk->icsk_ack.pending);
+ u8 ack_pending = READ_ONCE(icsk->icsk_ack.pending);
const struct tcp_sock *tp = tcp_sk(ssk);
return (ack_pending & ICSK_ACK_SCHED) &&
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 87112dad1fd4..049890e00a3d 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -74,7 +74,7 @@ obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o
nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \
nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \
- nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o \
+ nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o nft_last.o \
nft_chain_route.o nf_tables_offload.o \
nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o \
nft_set_pipapo.o
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index be14e0bea4c8..55647409a9be 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -45,12 +45,13 @@
static DEFINE_MUTEX(nf_ct_proto_mutex);
#ifdef CONFIG_SYSCTL
-__printf(5, 6)
+__printf(4, 5)
void nf_l4proto_log_invalid(const struct sk_buff *skb,
- struct net *net,
- u16 pf, u8 protonum,
+ const struct nf_hook_state *state,
+ u8 protonum,
const char *fmt, ...)
{
+ struct net *net = state->net;
struct va_format vaf;
va_list args;
@@ -62,15 +63,16 @@ void nf_l4proto_log_invalid(const struct sk_buff *skb,
vaf.fmt = fmt;
vaf.va = &args;
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_proto_%d: %pV ", protonum, &vaf);
+ nf_log_packet(net, state->pf, 0, skb, state->in, state->out,
+ NULL, "nf_ct_proto_%d: %pV ", protonum, &vaf);
va_end(args);
}
EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid);
-__printf(3, 4)
+__printf(4, 5)
void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
const struct nf_conn *ct,
+ const struct nf_hook_state *state,
const char *fmt, ...)
{
struct va_format vaf;
@@ -85,7 +87,7 @@ void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
vaf.fmt = fmt;
vaf.va = &args;
- nf_l4proto_log_invalid(skb, net, nf_ct_l3num(ct),
+ nf_l4proto_log_invalid(skb, state,
nf_ct_protonum(ct), "%pV", &vaf);
va_end(args);
}
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 4f33307fa3cf..c1557d47ccd1 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -382,7 +382,8 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
static noinline bool
dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
- const struct dccp_hdr *dh)
+ const struct dccp_hdr *dh,
+ const struct nf_hook_state *hook_state)
{
struct net *net = nf_ct_net(ct);
struct nf_dccp_net *dn;
@@ -414,7 +415,7 @@ dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
return true;
out_invalid:
- nf_ct_l4proto_log_invalid(skb, ct, "%s", msg);
+ nf_ct_l4proto_log_invalid(skb, ct, hook_state, "%s", msg);
return false;
}
@@ -464,8 +465,7 @@ static bool dccp_error(const struct dccp_hdr *dh,
}
return false;
out_invalid:
- nf_l4proto_log_invalid(skb, state->net, state->pf,
- IPPROTO_DCCP, "%s", msg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_DCCP, "%s", msg);
return true;
}
@@ -488,7 +488,7 @@ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
return -NF_ACCEPT;
type = dh->dccph_type;
- if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh))
+ if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh, state))
return -NF_ACCEPT;
if (type == DCCP_PKT_RESET &&
@@ -543,11 +543,11 @@ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
ct->proto.dccp.last_pkt = type;
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid packet");
+ nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid packet");
return NF_ACCEPT;
case CT_DCCP_INVALID:
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid state transition");
+ nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid state transition");
return -NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index 4efd8741c105..b38b7164acd5 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -170,12 +170,12 @@ int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
ct_daddr = &ct->tuplehash[dir].tuple.dst.u3;
if (!nf_inet_addr_cmp(outer_daddr, ct_daddr)) {
if (state->pf == AF_INET) {
- nf_l4proto_log_invalid(skb, state->net, state->pf,
+ nf_l4proto_log_invalid(skb, state,
l4proto,
"outer daddr %pI4 != inner %pI4",
&outer_daddr->ip, &ct_daddr->ip);
} else if (state->pf == AF_INET6) {
- nf_l4proto_log_invalid(skb, state->net, state->pf,
+ nf_l4proto_log_invalid(skb, state,
l4proto,
"outer daddr %pI6 != inner %pI6",
&outer_daddr->ip6, &ct_daddr->ip6);
@@ -197,8 +197,7 @@ static void icmp_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
const char *msg)
{
- nf_l4proto_log_invalid(skb, state->net, state->pf,
- IPPROTO_ICMP, "%s", msg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_ICMP, "%s", msg);
}
/* Small and modified version of icmp_rcv */
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index facd8c64ec4e..61e3b05cf02c 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -126,8 +126,7 @@ static void icmpv6_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
const char *msg)
{
- nf_l4proto_log_invalid(skb, state->net, state->pf,
- IPPROTO_ICMPV6, "%s", msg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_ICMPV6, "%s", msg);
}
int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index fb8dc02e502f..2394238d01c9 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -351,7 +351,7 @@ static bool sctp_error(struct sk_buff *skb,
}
return false;
out_invalid:
- nf_l4proto_log_invalid(skb, state->net, state->pf, IPPROTO_SCTP, "%s", logmsg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_SCTP, "%s", logmsg);
return true;
}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index de840fc41a2e..f7e8baf59b51 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -446,14 +446,15 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
}
}
-static bool tcp_in_window(const struct nf_conn *ct,
- struct ip_ct_tcp *state,
+static bool tcp_in_window(struct nf_conn *ct,
enum ip_conntrack_dir dir,
unsigned int index,
const struct sk_buff *skb,
unsigned int dataoff,
- const struct tcphdr *tcph)
+ const struct tcphdr *tcph,
+ const struct nf_hook_state *hook_state)
{
+ struct ip_ct_tcp *state = &ct->proto.tcp;
struct net *net = nf_ct_net(ct);
struct nf_tcp_net *tn = nf_tcp_pernet(net);
struct ip_ct_tcp_state *sender = &state->seen[dir];
@@ -670,7 +671,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
tn->tcp_be_liberal)
res = true;
if (!res) {
- nf_ct_l4proto_log_invalid(skb, ct,
+ nf_ct_l4proto_log_invalid(skb, ct, hook_state,
"%s",
before(seq, sender->td_maxend + 1) ?
in_recv_win ?
@@ -710,7 +711,7 @@ static void tcp_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
const char *msg)
{
- nf_l4proto_log_invalid(skb, state->net, state->pf, IPPROTO_TCP, "%s", msg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_TCP, "%s", msg);
}
/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
@@ -970,7 +971,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
IP_CT_EXP_CHALLENGE_ACK;
}
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct,
+ nf_ct_l4proto_log_invalid(skb, ct, state,
"packet (index %d) in dir %d ignored, state %s",
index, dir,
tcp_conntrack_names[old_state]);
@@ -995,7 +996,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
dir, get_conntrack_index(th), old_state);
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, "invalid state");
+ nf_ct_l4proto_log_invalid(skb, ct, state, "invalid state");
return -NF_ACCEPT;
case TCP_CONNTRACK_TIME_WAIT:
/* RFC5961 compliance cause stack to send "challenge-ACK"
@@ -1010,7 +1011,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
/* Detected RFC5961 challenge ACK */
ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored");
+ nf_ct_l4proto_log_invalid(skb, ct, state, "challenge-ack ignored");
return NF_ACCEPT; /* Don't change state */
}
break;
@@ -1035,7 +1036,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) {
/* Invalid RST */
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
+ nf_ct_l4proto_log_invalid(skb, ct, state, "invalid rst");
return -NF_ACCEPT;
}
@@ -1079,8 +1080,8 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
break;
}
- if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
- skb, dataoff, th)) {
+ if (!tcp_in_window(ct, dir, index,
+ skb, dataoff, th, state)) {
spin_unlock_bh(&ct->lock);
return -NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 68911fcaa0f1..698fee49e732 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -38,8 +38,7 @@ static void udp_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
const char *msg)
{
- nf_l4proto_log_invalid(skb, state->net, state->pf,
- IPPROTO_UDP, "%s", msg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_UDP, "%s", msg);
}
static bool udp_error(struct sk_buff *skb,
@@ -130,8 +129,7 @@ static void udplite_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
const char *msg)
{
- nf_l4proto_log_invalid(skb, state->net, state->pf,
- IPPROTO_UDPLITE, "%s", msg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_UDPLITE, "%s", msg);
}
static bool udplite_error(struct sk_buff *skb,
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 7780342e2f2d..866cfba04d6c 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -268,6 +268,7 @@ static struct nft_expr_type *nft_basic_types[] = {
&nft_meta_type,
&nft_rt_type,
&nft_exthdr_type,
+ &nft_last_type,
};
static struct nft_object_type *nft_basic_objects[] = {
diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c
index 58fda6ac663b..50b4e3c9347a 100644
--- a/net/netfilter/nfnetlink_hook.c
+++ b/net/netfilter/nfnetlink_hook.c
@@ -126,8 +126,10 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb,
#ifdef CONFIG_KALLSYMS
ret = snprintf(sym, sizeof(sym), "%ps", ops->hook);
- if (ret < 0 || ret > (int)sizeof(sym))
+ if (ret >= sizeof(sym)) {
+ ret = -EINVAL;
goto nla_put_failure;
+ }
module_name = strstr(sym, " [");
if (module_name) {
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 7f705b5c09de..4f583d2e220e 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -164,7 +164,7 @@ nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
{
struct tcphdr *tcph;
- if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
+ if (pkt->tprot != IPPROTO_TCP)
return NULL;
tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer);
@@ -312,6 +312,9 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
const struct sctp_chunkhdr *sch;
struct sctp_chunkhdr _sch;
+ if (pkt->tprot != IPPROTO_SCTP)
+ goto err;
+
do {
sch = skb_header_pointer(pkt->skb, offset, sizeof(_sch), &_sch);
if (!sch || !sch->length)
@@ -334,7 +337,7 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
}
offset += SCTP_PAD4(ntohs(sch->length));
} while (offset < pkt->skb->len);
-
+err:
if (priv->flags & NFT_EXTHDR_F_PRESENT)
nft_reg_store8(dest, false);
else
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
new file mode 100644
index 000000000000..913ac45167f2
--- /dev/null
+++ b/net/netfilter/nft_last.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_last_priv {
+ unsigned long last_jiffies;
+ unsigned int last_set;
+};
+
+static const struct nla_policy nft_last_policy[NFTA_LAST_MAX + 1] = {
+ [NFTA_LAST_SET] = { .type = NLA_U32 },
+ [NFTA_LAST_MSECS] = { .type = NLA_U64 },
+};
+
+static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_last_priv *priv = nft_expr_priv(expr);
+ u64 last_jiffies;
+ int err;
+
+ if (tb[NFTA_LAST_MSECS]) {
+ err = nf_msecs_to_jiffies64(tb[NFTA_LAST_MSECS], &last_jiffies);
+ if (err < 0)
+ return err;
+
+ priv->last_jiffies = jiffies + (unsigned long)last_jiffies;
+ priv->last_set = 1;
+ }
+
+ return 0;
+}
+
+static void nft_last_eval(const struct nft_expr *expr,
+ struct nft_regs *regs, const struct nft_pktinfo *pkt)
+{
+ struct nft_last_priv *priv = nft_expr_priv(expr);
+
+ priv->last_jiffies = jiffies;
+ priv->last_set = 1;
+}
+
+static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_last_priv *priv = nft_expr_priv(expr);
+ __be64 msecs;
+
+ if (time_before(jiffies, priv->last_jiffies))
+ priv->last_set = 0;
+
+ if (priv->last_set)
+ msecs = nf_jiffies64_to_msecs(jiffies - priv->last_jiffies);
+ else
+ msecs = 0;
+
+ if (nla_put_be32(skb, NFTA_LAST_SET, htonl(priv->last_set)) ||
+ nla_put_be64(skb, NFTA_LAST_MSECS, msecs, NFTA_LAST_PAD))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static const struct nft_expr_ops nft_last_ops = {
+ .type = &nft_last_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_last_priv)),
+ .eval = nft_last_eval,
+ .init = nft_last_init,
+ .dump = nft_last_dump,
+};
+
+struct nft_expr_type nft_last_type __read_mostly = {
+ .name = "last",
+ .ops = &nft_last_ops,
+ .policy = nft_last_policy,
+ .maxattr = NFTA_LAST_MAX,
+ .flags = NFT_EXPR_STATEFUL,
+ .owner = THIS_MODULE,
+};
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e9c0afc8becc..d9ac60ffe927 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -52,6 +52,8 @@ static void qdisc_maybe_clear_missed(struct Qdisc *q,
*/
if (!netif_xmit_frozen_or_stopped(txq))
set_bit(__QDISC_STATE_MISSED, &q->state);
+ else
+ set_bit(__QDISC_STATE_DRAINING, &q->state);
}
/* Main transmission queue. */
@@ -164,9 +166,13 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
skb = next;
}
- if (lock)
+
+ if (lock) {
spin_unlock(lock);
- __netif_schedule(q);
+ set_bit(__QDISC_STATE_MISSED, &q->state);
+ } else {
+ __netif_schedule(q);
+ }
}
static void try_bulk_dequeue_skb(struct Qdisc *q,
@@ -409,7 +415,11 @@ void __qdisc_run(struct Qdisc *q)
while (qdisc_restart(q, &packets)) {
quota -= packets;
if (quota <= 0) {
- __netif_schedule(q);
+ if (q->flags & TCQ_F_NOLOCK)
+ set_bit(__QDISC_STATE_MISSED, &q->state);
+ else
+ __netif_schedule(q);
+
break;
}
}
@@ -698,13 +708,14 @@ retry:
if (likely(skb)) {
qdisc_update_stats_at_dequeue(qdisc, skb);
} else if (need_retry &&
- test_bit(__QDISC_STATE_MISSED, &qdisc->state)) {
+ READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY) {
/* Delay clearing the STATE_MISSED here to reduce
* the overhead of the second spin_trylock() in
* qdisc_run_begin() and __netif_schedule() calling
* in qdisc_run_end().
*/
clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+ clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
/* Make sure dequeuing happens after clearing
* STATE_MISSED.
@@ -714,8 +725,6 @@ retry:
need_retry = false;
goto retry;
- } else {
- WRITE_ONCE(qdisc->empty, true);
}
return skb;
@@ -916,7 +925,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
sch->dev_queue = dev_queue;
- sch->empty = true;
dev_hold(dev);
refcount_set(&sch->refcnt, 1);
@@ -1222,6 +1230,7 @@ static void dev_reset_queue(struct net_device *dev,
spin_unlock_bh(qdisc_lock(qdisc));
if (nolock) {
clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+ clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
spin_unlock_bh(&qdisc->seqlock);
}
}
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index d29b579da904..09a8f23ec709 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1275,7 +1275,10 @@ enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net,
return SCTP_DISPOSITION_DISCARD;
sctp_transport_pl_recv(link);
- return SCTP_DISPOSITION_CONSUME;
+ if (link->pl.state == SCTP_PL_COMPLETE)
+ return SCTP_DISPOSITION_CONSUME;
+
+ return sctp_sf_send_probe(net, ep, asoc, type, link, commands);
}
max_interval = link->hbinterval + link->rto;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index f27b856ea8ce..5f23804f21c7 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -213,15 +213,10 @@ void sctp_transport_reset_reconf_timer(struct sctp_transport *transport)
void sctp_transport_reset_probe_timer(struct sctp_transport *transport)
{
- int scale = 1;
-
if (timer_pending(&transport->probe_timer))
return;
- if (transport->pl.state == SCTP_PL_COMPLETE &&
- transport->pl.probe_count == 1)
- scale = 30; /* works as PMTU_RAISE_TIMER */
if (!mod_timer(&transport->probe_timer,
- jiffies + transport->probe_interval * scale))
+ jiffies + transport->probe_interval))
sctp_transport_hold(transport);
}
@@ -333,13 +328,15 @@ void sctp_transport_pl_recv(struct sctp_transport *t)
t->pl.probe_size += SCTP_PL_MIN_STEP;
if (t->pl.probe_size >= t->pl.probe_high) {
t->pl.probe_high = 0;
+ t->pl.raise_count = 0;
t->pl.state = SCTP_PL_COMPLETE; /* Search -> Search Complete */
t->pl.probe_size = t->pl.pmtu;
t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
sctp_assoc_sync_pmtu(t->asoc);
}
- } else if (t->pl.state == SCTP_PL_COMPLETE) {
+ } else if (t->pl.state == SCTP_PL_COMPLETE && ++t->pl.raise_count == 30) {
+ /* Raise probe_size again after 30 * interval in Search Complete */
t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
t->pl.probe_size += SCTP_PL_MIN_STEP;
}
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 075c4f4b41cf..289025cd545a 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -154,6 +154,9 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
goto out_err;
}
+ if (sk->sk_state == SMC_INIT)
+ return -ENOTCONN;
+
if (len > conn->sndbuf_desc->len)
SMC_STAT_RMB_TX_SIZE_SMALL(smc, !conn->lnk);
@@ -164,8 +167,6 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
SMC_STAT_INC(smc, urg_data_cnt);
while (msg_data_left(msg)) {
- if (sk->sk_state == SMC_INIT)
- return -ENOTCONN;
if (smc->sk.sk_shutdown & SEND_SHUTDOWN ||
(smc->sk.sk_err == ECONNABORTED) ||
conn->killed)
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index e4cb0ff4dcf4..e321fc63a2e9 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -565,6 +565,42 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb
return 0;
}
+/* For partial checksum offload, the outer header checksum is calculated
+ * by software and the inner header checksum is calculated by hardware.
+ * This requires hardware to know the inner packet type to calculate
+ * the inner header checksum. Save inner ip protocol here to avoid
+ * traversing the packet in the vendor's xmit code.
+ * If the encap type is IPIP, just save skb->inner_ipproto. Otherwise,
+ * get the ip protocol from the IP header.
+ */
+static void xfrm_get_inner_ipproto(struct sk_buff *skb)
+{
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ const struct ethhdr *eth;
+
+ if (!xo)
+ return;
+
+ if (skb->inner_protocol_type == ENCAP_TYPE_IPPROTO) {
+ xo->inner_ipproto = skb->inner_ipproto;
+ return;
+ }
+
+ if (skb->inner_protocol_type != ENCAP_TYPE_ETHER)
+ return;
+
+ eth = (struct ethhdr *)skb_inner_mac_header(skb);
+
+ switch (ntohs(eth->h_proto)) {
+ case ETH_P_IPV6:
+ xo->inner_ipproto = inner_ipv6_hdr(skb)->nexthdr;
+ break;
+ case ETH_P_IP:
+ xo->inner_ipproto = inner_ip_hdr(skb)->protocol;
+ break;
+ }
+}
+
int xfrm_output(struct sock *sk, struct sk_buff *skb)
{
struct net *net = dev_net(skb_dst(skb)->dev);
@@ -594,12 +630,15 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
kfree_skb(skb);
return -ENOMEM;
}
- skb->encapsulation = 1;
sp->olen++;
sp->xvec[sp->len++] = x;
xfrm_state_hold(x);
+ if (skb->encapsulation)
+ xfrm_get_inner_ipproto(skb);
+ skb->encapsulation = 1;
+
if (skb_is_gso(skb)) {
if (skb->inner_protocol)
return xfrm_output_gso(net, sk, skb);