aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorBenjamin Tissoires <benjamin.tissoires@redhat.com>2019-08-23 11:35:39 +0200
committerBenjamin Tissoires <benjamin.tissoires@redhat.com>2019-08-23 11:35:39 +0200
commit6f50fa2a6f1395ad5f59ce7b87730f1f3ea19d76 (patch)
treef826145c5bf9c502c033e7b67bcefc30eb0f9eee /net
parentHID: logitech-dj: extend consumer usages range (diff)
parentMerge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid (diff)
downloadlinux-dev-6f50fa2a6f1395ad5f59ce7b87730f1f3ea19d76.tar.xz
linux-dev-6f50fa2a6f1395ad5f59ce7b87730f1f3ea19d76.zip
Merge branch 'master' into for-5.4/logitech
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Diffstat (limited to 'net')
-rw-r--r--net/9p/trans_virtio.c8
-rw-r--r--net/9p/trans_xen.c8
-rw-r--r--net/batman-adv/multicast.c8
-rw-r--r--net/bluetooth/hci_core.c1
-rw-r--r--net/bluetooth/hci_debugfs.c31
-rw-r--r--net/bluetooth/hidp/core.c9
-rw-r--r--net/bluetooth/l2cap_core.c2
-rw-r--r--net/bpfilter/Kconfig2
-rw-r--r--net/bridge/br.c5
-rw-r--r--net/bridge/br_multicast.c3
-rw-r--r--net/bridge/br_private.h9
-rw-r--r--net/bridge/br_vlan.c29
-rw-r--r--net/bridge/netfilter/Kconfig6
-rw-r--r--net/bridge/netfilter/ebtables.c32
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c10
-rw-r--r--net/can/gw.c48
-rw-r--r--net/ceph/Makefile2
-rw-r--r--net/ceph/ceph_common.c5
-rw-r--r--net/ceph/cls_lock_client.c54
-rw-r--r--net/ceph/debugfs.c33
-rw-r--r--net/ceph/decode.c84
-rw-r--r--net/ceph/messenger.c14
-rw-r--r--net/ceph/mon_client.c21
-rw-r--r--net/ceph/osd_client.c42
-rw-r--r--net/ceph/osdmap.c31
-rw-r--r--net/ceph/pagevec.c33
-rw-r--r--net/ceph/striper.c17
-rw-r--r--net/compat.c3
-rw-r--r--net/core/dev.c17
-rw-r--r--net/core/filter.c32
-rw-r--r--net/core/flow_offload.c22
-rw-r--r--net/core/neighbour.c22
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/core/skmsg.c4
-rw-r--r--net/core/sock.c21
-rw-r--r--net/core/sock_diag.c3
-rw-r--r--net/core/sock_map.c19
-rw-r--r--net/core/sysctl_net_core.c34
-rw-r--r--net/dccp/sysctl.c16
-rw-r--r--net/dsa/slave.c6
-rw-r--r--net/dsa/switch.c3
-rw-r--r--net/dsa/tag_sja1105.c11
-rw-r--r--net/hsr/hsr_device.c18
-rw-r--r--net/hsr/hsr_device.h1
-rw-r--r--net/hsr/hsr_netlink.c7
-rw-r--r--net/ieee802154/6lowpan/reassembly.c2
-rw-r--r--net/ipv4/fib_frontend.c5
-rw-r--r--net/ipv4/inet_fragment.c41
-rw-r--r--net/ipv4/ip_fragment.c8
-rw-r--r--net/ipv4/ipip.c3
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c4
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c2
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c12
-rw-r--r--net/ipv4/sysctl_net_ipv4.c60
-rw-r--r--net/ipv4/tcp.c7
-rw-r--r--net/ipv4/tcp_bpf.c6
-rw-r--r--net/ipv4/tcp_cong.c6
-rw-r--r--net/ipv4/tcp_output.c16
-rw-r--r--net/ipv4/tcp_ulp.c13
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv6/addrconf.c6
-rw-r--r--net/ipv6/ah6.c4
-rw-r--r--net/ipv6/datagram.c2
-rw-r--r--net/ipv6/exthdrs_core.c2
-rw-r--r--net/ipv6/ip6_fib.c18
-rw-r--r--net/ipv6/ip6_flowlabel.c9
-rw-r--r--net/ipv6/ip6_gre.c3
-rw-r--r--net/ipv6/ip6_tunnel.c8
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c2
-rw-r--r--net/ipv6/netfilter/ip6t_ah.c2
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c2
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c8
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c4
-rw-r--r--net/ipv6/netfilter/nf_log_ipv6.c2
-rw-r--r--net/ipv6/reassembly.c2
-rw-r--r--net/ipv6/route.c11
-rw-r--r--net/ipv6/sit.c13
-rw-r--r--net/ipv6/sysctl_net_ipv6.c10
-rw-r--r--net/ipv6/tcp_ipv6.c7
-rw-r--r--net/iucv/af_iucv.c14
-rw-r--r--net/l2tp/l2tp_ppp.c3
-rw-r--r--net/mac80211/cfg.c8
-rw-r--r--net/mac80211/driver-ops.c13
-rw-r--r--net/mac80211/iface.c1
-rw-r--r--net/mac80211/mlme.c10
-rw-r--r--net/mac80211/util.c7
-rw-r--r--net/mpls/af_mpls.c10
-rw-r--r--net/netfilter/Kconfig6
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c2
-rw-r--r--net/netfilter/ipset/ip_set_core.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmac.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c2
-rw-r--r--net/netfilter/nf_conntrack_amanda.c2
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c20
-rw-r--r--net/netfilter/nf_conntrack_expect.c26
-rw-r--r--net/netfilter/nf_conntrack_ftp.c2
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c5
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c18
-rw-r--r--net/netfilter/nf_conntrack_irc.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c4
-rw-r--r--net/netfilter/nf_conntrack_pptp.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_icmp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c8
-rw-r--r--net/netfilter/nf_conntrack_sane.c2
-rw-r--r--net/netfilter/nf_conntrack_sip.c10
-rw-r--r--net/netfilter/nf_conntrack_tftp.c2
-rw-r--r--net/netfilter/nf_flow_table_core.c43
-rw-r--r--net/netfilter/nf_flow_table_ip.c43
-rw-r--r--net/netfilter/nf_nat_amanda.c2
-rw-r--r--net/netfilter/nf_nat_core.c2
-rw-r--r--net/netfilter/nf_nat_ftp.c2
-rw-r--r--net/netfilter/nf_nat_irc.c2
-rw-r--r--net/netfilter/nf_nat_sip.c8
-rw-r--r--net/netfilter/nf_nat_tftp.c2
-rw-r--r--net/netfilter/nf_synproxy_core.c8
-rw-r--r--net/netfilter/nf_tables_api.c23
-rw-r--r--net/netfilter/nf_tables_offload.c22
-rw-r--r--net/netfilter/nfnetlink.c2
-rw-r--r--net/netfilter/nft_chain_filter.c2
-rw-r--r--net/netfilter/nft_chain_nat.c3
-rw-r--r--net/netfilter/nft_ct.c2
-rw-r--r--net/netfilter/nft_flow_offload.c9
-rw-r--r--net/netfilter/nft_hash.c2
-rw-r--r--net/netfilter/nft_meta.c18
-rw-r--r--net/netfilter/nft_redir.c2
-rw-r--r--net/netfilter/nft_synproxy.c2
-rw-r--r--net/netrom/af_netrom.c1
-rw-r--r--net/openvswitch/datapath.c54
-rw-r--r--net/openvswitch/flow.c8
-rw-r--r--net/openvswitch/flow.h4
-rw-r--r--net/openvswitch/flow_table.c8
-rw-r--r--net/packet/af_packet.c7
-rw-r--r--net/rds/connection.c1
-rw-r--r--net/rds/ib.h5
-rw-r--r--net/rds/ib_cm.c26
-rw-r--r--net/rds/ib_frmr.c95
-rw-r--r--net/rds/ib_mr.h4
-rw-r--r--net/rds/ib_rdma.c60
-rw-r--r--net/rds/ib_send.c29
-rw-r--r--net/rds/rdma.c10
-rw-r--r--net/rds/rdma_transport.c16
-rw-r--r--net/rds/rds.h1
-rw-r--r--net/rds/send.c4
-rw-r--r--net/rxrpc/af_rxrpc.c6
-rw-r--r--net/rxrpc/ar-internal.h9
-rw-r--r--net/rxrpc/call_event.c15
-rw-r--r--net/rxrpc/input.c59
-rw-r--r--net/rxrpc/local_object.c103
-rw-r--r--net/rxrpc/output.c3
-rw-r--r--net/rxrpc/peer_event.c2
-rw-r--r--net/rxrpc/peer_object.c18
-rw-r--r--net/rxrpc/recvmsg.c6
-rw-r--r--net/rxrpc/sendmsg.c1
-rw-r--r--net/rxrpc/sysctl.c9
-rw-r--r--net/sched/Kconfig2
-rw-r--r--net/sched/act_bpf.c9
-rw-r--r--net/sched/act_connmark.c9
-rw-r--r--net/sched/act_csum.c9
-rw-r--r--net/sched/act_ct.c9
-rw-r--r--net/sched/act_ctinfo.c9
-rw-r--r--net/sched/act_gact.c8
-rw-r--r--net/sched/act_ife.c13
-rw-r--r--net/sched/act_mirred.c13
-rw-r--r--net/sched/act_mpls.c8
-rw-r--r--net/sched/act_nat.c9
-rw-r--r--net/sched/act_pedit.c10
-rw-r--r--net/sched/act_police.c8
-rw-r--r--net/sched/act_sample.c10
-rw-r--r--net/sched/act_simple.c10
-rw-r--r--net/sched/act_skbedit.c23
-rw-r--r--net/sched/act_skbmod.c11
-rw-r--r--net/sched/act_tunnel_key.c8
-rw-r--r--net/sched/act_vlan.c25
-rw-r--r--net/sched/cls_api.c17
-rw-r--r--net/sched/cls_bpf.c2
-rw-r--r--net/sched/cls_flower.c2
-rw-r--r--net/sched/cls_matchall.c2
-rw-r--r--net/sched/cls_u32.c6
-rw-r--r--net/sched/sch_codel.c6
-rw-r--r--net/sched/sch_fq_codel.c2
-rw-r--r--net/sched/sch_sfq.c2
-rw-r--r--net/sched/sch_taprio.c9
-rw-r--r--net/sctp/sm_make_chunk.c12
-rw-r--r--net/sctp/sm_sideeffect.c2
-rw-r--r--net/sctp/socket.c4
-rw-r--r--net/sctp/stream.c1
-rw-r--r--net/sctp/sysctl.c35
-rw-r--r--net/smc/af_smc.c15
-rw-r--r--net/socket.c34
-rw-r--r--net/sunrpc/Kconfig2
-rw-r--r--net/sunrpc/backchannel_rqst.c40
-rw-r--r--net/sunrpc/clnt.c95
-rw-r--r--net/sunrpc/debugfs.c116
-rw-r--r--net/sunrpc/rpc_pipe.c34
-rw-r--r--net/sunrpc/sched.c81
-rw-r--r--net/sunrpc/stats.c23
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/xprt.c101
-rw-r--r--net/sunrpc/xprtmultipath.c89
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c7
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c327
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c152
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c8
-rw-r--r--net/sunrpc/xprtrdma/transport.c87
-rw-r--r--net/sunrpc/xprtrdma/verbs.c115
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h45
-rw-r--r--net/sunrpc/xprtsock.c126
-rw-r--r--net/tipc/addr.c1
-rw-r--r--net/tipc/link.c92
-rw-r--r--net/tipc/msg.h8
-rw-r--r--net/tipc/name_distr.c2
-rw-r--r--net/tipc/netlink_compat.c11
-rw-r--r--net/tipc/node.c1
-rw-r--r--net/tipc/socket.c3
-rw-r--r--net/tipc/sysctl.c6
-rw-r--r--net/tipc/topsrv.c2
-rw-r--r--net/tls/tls_device.c9
-rw-r--r--net/tls/tls_main.c99
-rw-r--r--net/tls/tls_sw.c83
-rw-r--r--net/vmw_vsock/hyperv_transport.c8
-rw-r--r--net/wireless/Kconfig2
-rw-r--r--net/wireless/core.c6
-rw-r--r--net/wireless/nl80211.c4
-rw-r--r--net/wireless/util.c27
-rw-r--r--net/xdp/xdp_umem.c16
-rw-r--r--net/xdp/xsk.c13
231 files changed, 2626 insertions, 1627 deletions
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 549938af02e7..a3cd90a74012 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -767,10 +767,16 @@ static struct p9_trans_module p9_virtio_trans = {
/* The standard init function */
static int __init p9_virtio_init(void)
{
+ int rc;
+
INIT_LIST_HEAD(&virtio_chan_list);
v9fs_register_trans(&p9_virtio_trans);
- return register_virtio_driver(&p9_virtio_drv);
+ rc = register_virtio_driver(&p9_virtio_drv);
+ if (rc)
+ v9fs_unregister_trans(&p9_virtio_trans);
+
+ return rc;
}
static void __exit p9_virtio_cleanup(void)
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 29420ebb8f07..3963eb11c3fb 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -530,13 +530,19 @@ static struct xenbus_driver xen_9pfs_front_driver = {
static int p9_trans_xen_init(void)
{
+ int rc;
+
if (!xen_domain())
return -ENODEV;
pr_info("Initialising Xen transport for 9pfs\n");
v9fs_register_trans(&p9_xen_trans);
- return xenbus_register_frontend(&xen_9pfs_front_driver);
+ rc = xenbus_register_frontend(&xen_9pfs_front_driver);
+ if (rc)
+ v9fs_unregister_trans(&p9_xen_trans);
+
+ return rc;
}
module_init(p9_trans_xen_init);
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 67d7f83009ae..1d5bdf3a4b65 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -2303,7 +2303,7 @@ __batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid,
while (bucket_tmp < hash->size) {
if (batadv_mcast_flags_dump_bucket(msg, portid, cb, hash,
- *bucket, &idx_tmp))
+ bucket_tmp, &idx_tmp))
break;
bucket_tmp++;
@@ -2420,8 +2420,10 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig)
batadv_mcast_want_unsnoop_update(bat_priv, orig, BATADV_NO_FLAGS);
batadv_mcast_want_ipv4_update(bat_priv, orig, BATADV_NO_FLAGS);
batadv_mcast_want_ipv6_update(bat_priv, orig, BATADV_NO_FLAGS);
- batadv_mcast_want_rtr4_update(bat_priv, orig, BATADV_NO_FLAGS);
- batadv_mcast_want_rtr6_update(bat_priv, orig, BATADV_NO_FLAGS);
+ batadv_mcast_want_rtr4_update(bat_priv, orig,
+ BATADV_MCAST_WANT_NO_RTR4);
+ batadv_mcast_want_rtr6_update(bat_priv, orig,
+ BATADV_MCAST_WANT_NO_RTR6);
spin_unlock_bh(&orig->mcast_handler_lock);
}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index b9585e7d9d2e..04bc79359a17 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3202,6 +3202,7 @@ struct hci_dev *hci_alloc_dev(void)
hdev->conn_info_min_age = DEFAULT_CONN_INFO_MIN_AGE;
hdev->conn_info_max_age = DEFAULT_CONN_INFO_MAX_AGE;
hdev->auth_payload_timeout = DEFAULT_AUTH_PAYLOAD_TIMEOUT;
+ hdev->min_enc_key_size = HCI_MIN_ENC_KEY_SIZE;
mutex_init(&hdev->lock);
mutex_init(&hdev->req_lock);
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index bb67f4a5479a..402e2cc54044 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -433,6 +433,35 @@ static int auto_accept_delay_set(void *data, u64 val)
return 0;
}
+static int min_encrypt_key_size_set(void *data, u64 val)
+{
+ struct hci_dev *hdev = data;
+
+ if (val < 1 || val > 16)
+ return -EINVAL;
+
+ hci_dev_lock(hdev);
+ hdev->min_enc_key_size = val;
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+static int min_encrypt_key_size_get(void *data, u64 *val)
+{
+ struct hci_dev *hdev = data;
+
+ hci_dev_lock(hdev);
+ *val = hdev->min_enc_key_size;
+ hci_dev_unlock(hdev);
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(min_encrypt_key_size_fops,
+ min_encrypt_key_size_get,
+ min_encrypt_key_size_set, "%llu\n");
+
static int auto_accept_delay_get(void *data, u64 *val)
{
struct hci_dev *hdev = data;
@@ -545,6 +574,8 @@ void hci_debugfs_create_bredr(struct hci_dev *hdev)
if (lmp_ssp_capable(hdev)) {
debugfs_create_file("ssp_debug_mode", 0444, hdev->debugfs,
hdev, &ssp_debug_mode_fops);
+ debugfs_create_file("min_encrypt_key_size", 0644, hdev->debugfs,
+ hdev, &min_encrypt_key_size_fops);
debugfs_create_file("auto_accept_delay", 0644, hdev->debugfs,
hdev, &auto_accept_delay_fops);
}
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 5abd423b55fa..8d889969ae7e 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -101,6 +101,7 @@ static int hidp_send_message(struct hidp_session *session, struct socket *sock,
{
struct sk_buff *skb;
struct sock *sk = sock->sk;
+ int ret;
BT_DBG("session %p data %p size %d", session, data, size);
@@ -114,13 +115,17 @@ static int hidp_send_message(struct hidp_session *session, struct socket *sock,
}
skb_put_u8(skb, hdr);
- if (data && size > 0)
+ if (data && size > 0) {
skb_put_data(skb, data, size);
+ ret = size;
+ } else {
+ ret = 0;
+ }
skb_queue_tail(transmit, skb);
wake_up_interruptible(sk_sleep(sk));
- return 0;
+ return ret;
}
static int hidp_send_ctrl_message(struct hidp_session *session,
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index cc506fe99b4d..dfc1edb168b7 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1361,7 +1361,7 @@ static bool l2cap_check_enc_key_size(struct hci_conn *hcon)
* actually encrypted before enforcing a key size.
*/
return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) ||
- hcon->enc_key_size >= HCI_MIN_ENC_KEY_SIZE);
+ hcon->enc_key_size >= hcon->hdev->min_enc_key_size);
}
static void l2cap_do_start(struct l2cap_chan *chan)
diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig
index 91f9d878165e..fed9290e3b41 100644
--- a/net/bpfilter/Kconfig
+++ b/net/bpfilter/Kconfig
@@ -9,7 +9,7 @@ menuconfig BPFILTER
if BPFILTER
config BPFILTER_UMH
tristate "bpfilter kernel module with user mode helper"
- depends on $(success,$(srctree)/scripts/cc-can-link.sh $(CC))
+ depends on CC_CAN_LINK
default m
help
This builds bpfilter kernel module with embedded user mode helper
diff --git a/net/bridge/br.c b/net/bridge/br.c
index d164f63a4345..8a8f9e5f264f 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -37,12 +37,15 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
int err;
if (dev->priv_flags & IFF_EBRIDGE) {
+ err = br_vlan_bridge_event(dev, event, ptr);
+ if (err)
+ return notifier_from_errno(err);
+
if (event == NETDEV_REGISTER) {
/* register of bridge completed, add sysfs entries */
br_sysfs_addbr(dev);
return NOTIFY_DONE;
}
- br_vlan_bridge_event(dev, event, ptr);
}
/* not a port of a bridge */
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 3d8deac2353d..f8cac3702712 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1388,6 +1388,9 @@ br_multicast_leave_group(struct net_bridge *br,
if (!br_port_group_equal(p, port, src))
continue;
+ if (p->flags & MDB_PG_FLAGS_PERMANENT)
+ break;
+
rcu_assign_pointer(*pp, p->next);
hlist_del_init(&p->mglist);
del_timer(&p->timer);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index e8cf03b43b7d..646504db0220 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -894,8 +894,8 @@ int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask);
void br_vlan_get_stats(const struct net_bridge_vlan *v,
struct br_vlan_stats *stats);
void br_vlan_port_event(struct net_bridge_port *p, unsigned long event);
-void br_vlan_bridge_event(struct net_device *dev, unsigned long event,
- void *ptr);
+int br_vlan_bridge_event(struct net_device *dev, unsigned long event,
+ void *ptr);
static inline struct net_bridge_vlan_group *br_vlan_group(
const struct net_bridge *br)
@@ -1085,9 +1085,10 @@ static inline void br_vlan_port_event(struct net_bridge_port *p,
{
}
-static inline void br_vlan_bridge_event(struct net_device *dev,
- unsigned long event, void *ptr)
+static inline int br_vlan_bridge_event(struct net_device *dev,
+ unsigned long event, void *ptr)
{
+ return 0;
}
#endif
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 021cc9f66804..f5b2aeebbfe9 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -1053,7 +1053,6 @@ int br_vlan_init(struct net_bridge *br)
{
struct net_bridge_vlan_group *vg;
int ret = -ENOMEM;
- bool changed;
vg = kzalloc(sizeof(*vg), GFP_KERNEL);
if (!vg)
@@ -1068,17 +1067,10 @@ int br_vlan_init(struct net_bridge *br)
br->vlan_proto = htons(ETH_P_8021Q);
br->default_pvid = 1;
rcu_assign_pointer(br->vlgrp, vg);
- ret = br_vlan_add(br, 1,
- BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED |
- BRIDGE_VLAN_INFO_BRENTRY, &changed, NULL);
- if (ret)
- goto err_vlan_add;
out:
return ret;
-err_vlan_add:
- vlan_tunnel_deinit(vg);
err_tunnel_init:
rhashtable_destroy(&vg->vlan_hash);
err_rhtbl:
@@ -1464,13 +1456,23 @@ static void nbp_vlan_set_vlan_dev_state(struct net_bridge_port *p, u16 vid)
}
/* Must be protected by RTNL. */
-void br_vlan_bridge_event(struct net_device *dev, unsigned long event,
- void *ptr)
+int br_vlan_bridge_event(struct net_device *dev, unsigned long event, void *ptr)
{
struct netdev_notifier_changeupper_info *info;
- struct net_bridge *br;
+ struct net_bridge *br = netdev_priv(dev);
+ bool changed;
+ int ret = 0;
switch (event) {
+ case NETDEV_REGISTER:
+ ret = br_vlan_add(br, br->default_pvid,
+ BRIDGE_VLAN_INFO_PVID |
+ BRIDGE_VLAN_INFO_UNTAGGED |
+ BRIDGE_VLAN_INFO_BRENTRY, &changed, NULL);
+ break;
+ case NETDEV_UNREGISTER:
+ br_vlan_delete(br, br->default_pvid);
+ break;
case NETDEV_CHANGEUPPER:
info = ptr;
br_vlan_upper_change(dev, info->upper_dev, info->linking);
@@ -1478,12 +1480,13 @@ void br_vlan_bridge_event(struct net_device *dev, unsigned long event,
case NETDEV_CHANGE:
case NETDEV_UP:
- br = netdev_priv(dev);
if (!br_opt_get(br, BROPT_VLAN_BRIDGE_BINDING))
- return;
+ break;
br_vlan_link_state_change(dev, br);
break;
}
+
+ return ret;
}
/* Must be protected by RTNL. */
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 154fa558bb90..5040fe43f4b4 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -6,7 +6,7 @@
menuconfig NF_TABLES_BRIDGE
depends on BRIDGE && NETFILTER && NF_TABLES
select NETFILTER_FAMILY_BRIDGE
- bool "Ethernet Bridge nf_tables support"
+ tristate "Ethernet Bridge nf_tables support"
if NF_TABLES_BRIDGE
@@ -25,6 +25,8 @@ config NF_LOG_BRIDGE
tristate "Bridge packet logging"
select NF_LOG_COMMON
+endif # NF_TABLES_BRIDGE
+
config NF_CONNTRACK_BRIDGE
tristate "IPv4/IPV6 bridge connection tracking support"
depends on NF_CONNTRACK
@@ -39,8 +41,6 @@ config NF_CONNTRACK_BRIDGE
To compile it as a module, choose M here. If unsure, say N.
-endif # NF_TABLES_BRIDGE
-
menuconfig BRIDGE_NF_EBTABLES
tristate "Ethernet Bridge tables (ebtables) support"
depends on BRIDGE && NETFILTER && NETFILTER_XTABLES
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 963dfdc14827..c8177a89f52c 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1770,20 +1770,28 @@ static int compat_calc_entry(const struct ebt_entry *e,
return 0;
}
+static int ebt_compat_init_offsets(unsigned int number)
+{
+ if (number > INT_MAX)
+ return -EINVAL;
+
+ /* also count the base chain policies */
+ number += NF_BR_NUMHOOKS;
+
+ return xt_compat_init_offsets(NFPROTO_BRIDGE, number);
+}
static int compat_table_info(const struct ebt_table_info *info,
struct compat_ebt_replace *newinfo)
{
unsigned int size = info->entries_size;
const void *entries = info->entries;
+ int ret;
newinfo->entries_size = size;
- if (info->nentries) {
- int ret = xt_compat_init_offsets(NFPROTO_BRIDGE,
- info->nentries);
- if (ret)
- return ret;
- }
+ ret = ebt_compat_init_offsets(info->nentries);
+ if (ret)
+ return ret;
return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
entries, newinfo);
@@ -2234,11 +2242,9 @@ static int compat_do_replace(struct net *net, void __user *user,
xt_compat_lock(NFPROTO_BRIDGE);
- if (tmp.nentries) {
- ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries);
- if (ret < 0)
- goto out_unlock;
- }
+ ret = ebt_compat_init_offsets(tmp.nentries);
+ if (ret < 0)
+ goto out_unlock;
ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state);
if (ret < 0)
@@ -2261,8 +2267,10 @@ static int compat_do_replace(struct net *net, void __user *user,
state.buf_kern_len = size64;
ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state);
- if (WARN_ON(ret < 0))
+ if (WARN_ON(ret < 0)) {
+ vfree(entries_tmp);
goto out_unlock;
+ }
vfree(entries_tmp);
tmp.entries_size = size64;
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index bed66f536b34..1804e867f715 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -30,13 +30,9 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
switch (priv->key) {
case NFT_META_BRI_IIFNAME:
br_dev = nft_meta_get_bridge(in);
- if (!br_dev)
- goto err;
break;
case NFT_META_BRI_OIFNAME:
br_dev = nft_meta_get_bridge(out);
- if (!br_dev)
- goto err;
break;
case NFT_META_BRI_IIFPVID: {
u16 p_pvid;
@@ -61,13 +57,11 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
return;
}
default:
- goto out;
+ return nft_meta_get_eval(expr, regs, pkt);
}
- strncpy((char *)dest, br_dev->name, IFNAMSIZ);
+ strncpy((char *)dest, br_dev ? br_dev->name : "", IFNAMSIZ);
return;
-out:
- return nft_meta_get_eval(expr, regs, pkt);
err:
regs->verdict.code = NFT_BREAK;
}
diff --git a/net/can/gw.c b/net/can/gw.c
index 5275ddf580bc..72711053ebe6 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1046,32 +1046,50 @@ static __init int cgw_module_init(void)
pr_info("can: netlink gateway (rev " CAN_GW_VERSION ") max_hops=%d\n",
max_hops);
- register_pernet_subsys(&cangw_pernet_ops);
+ ret = register_pernet_subsys(&cangw_pernet_ops);
+ if (ret)
+ return ret;
+
+ ret = -ENOMEM;
cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job),
0, 0, NULL);
-
if (!cgw_cache)
- return -ENOMEM;
+ goto out_cache_create;
/* set notifier */
notifier.notifier_call = cgw_notifier;
- register_netdevice_notifier(&notifier);
+ ret = register_netdevice_notifier(&notifier);
+ if (ret)
+ goto out_register_notifier;
ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_GETROUTE,
NULL, cgw_dump_jobs, 0);
- if (ret) {
- unregister_netdevice_notifier(&notifier);
- kmem_cache_destroy(cgw_cache);
- return -ENOBUFS;
- }
-
- /* Only the first call to rtnl_register_module can fail */
- rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE,
- cgw_create_job, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE,
- cgw_remove_job, NULL, 0);
+ if (ret)
+ goto out_rtnl_register1;
+
+ ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE,
+ cgw_create_job, NULL, 0);
+ if (ret)
+ goto out_rtnl_register2;
+ ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE,
+ cgw_remove_job, NULL, 0);
+ if (ret)
+ goto out_rtnl_register3;
return 0;
+
+out_rtnl_register3:
+ rtnl_unregister(PF_CAN, RTM_NEWROUTE);
+out_rtnl_register2:
+ rtnl_unregister(PF_CAN, RTM_GETROUTE);
+out_rtnl_register1:
+ unregister_netdevice_notifier(&notifier);
+out_register_notifier:
+ kmem_cache_destroy(cgw_cache);
+out_cache_create:
+ unregister_pernet_subsys(&cangw_pernet_ops);
+
+ return ret;
}
static __exit void cgw_module_exit(void)
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index db09defe27d0..59d0ba2072de 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -5,7 +5,7 @@
obj-$(CONFIG_CEPH_LIB) += libceph.o
libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
- mon_client.o \
+ mon_client.o decode.o \
cls_lock_client.o \
osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
striper.o \
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 1c811c74bfc0..4eeea4d5c3ef 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -776,9 +776,7 @@ static int __init init_ceph_lib(void)
{
int ret = 0;
- ret = ceph_debugfs_init();
- if (ret < 0)
- goto out;
+ ceph_debugfs_init();
ret = ceph_crypto_init();
if (ret < 0)
@@ -803,7 +801,6 @@ out_crypto:
ceph_crypto_shutdown();
out_debugfs:
ceph_debugfs_cleanup();
-out:
return ret;
}
diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c
index 4cc28541281b..17447c19d937 100644
--- a/net/ceph/cls_lock_client.c
+++ b/net/ceph/cls_lock_client.c
@@ -6,6 +6,7 @@
#include <linux/ceph/cls_lock_client.h>
#include <linux/ceph/decode.h>
+#include <linux/ceph/libceph.h>
/**
* ceph_cls_lock - grab rados lock for object
@@ -264,8 +265,11 @@ static int decode_locker(void **p, void *end, struct ceph_locker *locker)
return ret;
*p += sizeof(struct ceph_timespec); /* skip expiration */
- ceph_decode_copy(p, &locker->info.addr, sizeof(locker->info.addr));
- ceph_decode_addr(&locker->info.addr);
+
+ ret = ceph_decode_entity_addr(p, end, &locker->info.addr);
+ if (ret)
+ return ret;
+
len = ceph_decode_32(p);
*p += len; /* skip description */
@@ -360,7 +364,7 @@ int ceph_cls_lock_info(struct ceph_osd_client *osdc,
dout("%s lock_name %s\n", __func__, lock_name);
ret = ceph_osdc_call(osdc, oid, oloc, "lock", "get_info",
CEPH_OSD_FLAG_READ, get_info_op_page,
- get_info_op_buf_size, reply_page, &reply_len);
+ get_info_op_buf_size, &reply_page, &reply_len);
dout("%s: status %d\n", __func__, ret);
if (ret >= 0) {
@@ -375,3 +379,47 @@ int ceph_cls_lock_info(struct ceph_osd_client *osdc,
return ret;
}
EXPORT_SYMBOL(ceph_cls_lock_info);
+
+int ceph_cls_assert_locked(struct ceph_osd_request *req, int which,
+ char *lock_name, u8 type, char *cookie, char *tag)
+{
+ int assert_op_buf_size;
+ int name_len = strlen(lock_name);
+ int cookie_len = strlen(cookie);
+ int tag_len = strlen(tag);
+ struct page **pages;
+ void *p, *end;
+ int ret;
+
+ assert_op_buf_size = name_len + sizeof(__le32) +
+ cookie_len + sizeof(__le32) +
+ tag_len + sizeof(__le32) +
+ sizeof(u8) + CEPH_ENCODING_START_BLK_LEN;
+ if (assert_op_buf_size > PAGE_SIZE)
+ return -E2BIG;
+
+ ret = osd_req_op_cls_init(req, which, "lock", "assert_locked");
+ if (ret)
+ return ret;
+
+ pages = ceph_alloc_page_vector(1, GFP_NOIO);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+
+ p = page_address(pages[0]);
+ end = p + assert_op_buf_size;
+
+ /* encode cls_lock_assert_op struct */
+ ceph_start_encoding(&p, 1, 1,
+ assert_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
+ ceph_encode_string(&p, end, lock_name, name_len);
+ ceph_encode_8(&p, type);
+ ceph_encode_string(&p, end, cookie, cookie_len);
+ ceph_encode_string(&p, end, tag, tag_len);
+ WARN_ON(p != end);
+
+ osd_req_op_cls_request_data_pages(req, which, pages, assert_op_buf_size,
+ 0, false, true);
+ return 0;
+}
+EXPORT_SYMBOL(ceph_cls_assert_locked);
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 63aef9915f75..7cb992e55475 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -389,12 +389,9 @@ CEPH_DEFINE_SHOW_FUNC(monc_show)
CEPH_DEFINE_SHOW_FUNC(osdc_show)
CEPH_DEFINE_SHOW_FUNC(client_options_show)
-int __init ceph_debugfs_init(void)
+void __init ceph_debugfs_init(void)
{
ceph_debugfs_dir = debugfs_create_dir("ceph", NULL);
- if (!ceph_debugfs_dir)
- return -ENOMEM;
- return 0;
}
void ceph_debugfs_cleanup(void)
@@ -402,9 +399,8 @@ void ceph_debugfs_cleanup(void)
debugfs_remove(ceph_debugfs_dir);
}
-int ceph_debugfs_client_init(struct ceph_client *client)
+void ceph_debugfs_client_init(struct ceph_client *client)
{
- int ret = -ENOMEM;
char name[80];
snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
@@ -412,56 +408,37 @@ int ceph_debugfs_client_init(struct ceph_client *client)
dout("ceph_debugfs_client_init %p %s\n", client, name);
- BUG_ON(client->debugfs_dir);
client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
- if (!client->debugfs_dir)
- goto out;
client->monc.debugfs_file = debugfs_create_file("monc",
0400,
client->debugfs_dir,
client,
&monc_show_fops);
- if (!client->monc.debugfs_file)
- goto out;
client->osdc.debugfs_file = debugfs_create_file("osdc",
0400,
client->debugfs_dir,
client,
&osdc_show_fops);
- if (!client->osdc.debugfs_file)
- goto out;
client->debugfs_monmap = debugfs_create_file("monmap",
0400,
client->debugfs_dir,
client,
&monmap_show_fops);
- if (!client->debugfs_monmap)
- goto out;
client->debugfs_osdmap = debugfs_create_file("osdmap",
0400,
client->debugfs_dir,
client,
&osdmap_show_fops);
- if (!client->debugfs_osdmap)
- goto out;
client->debugfs_options = debugfs_create_file("client_options",
0400,
client->debugfs_dir,
client,
&client_options_show_fops);
- if (!client->debugfs_options)
- goto out;
-
- return 0;
-
-out:
- ceph_debugfs_client_cleanup(client);
- return ret;
}
void ceph_debugfs_client_cleanup(struct ceph_client *client)
@@ -477,18 +454,16 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client)
#else /* CONFIG_DEBUG_FS */
-int __init ceph_debugfs_init(void)
+void __init ceph_debugfs_init(void)
{
- return 0;
}
void ceph_debugfs_cleanup(void)
{
}
-int ceph_debugfs_client_init(struct ceph_client *client)
+void ceph_debugfs_client_init(struct ceph_client *client)
{
- return 0;
}
void ceph_debugfs_client_cleanup(struct ceph_client *client)
diff --git a/net/ceph/decode.c b/net/ceph/decode.c
new file mode 100644
index 000000000000..eea529595a7a
--- /dev/null
+++ b/net/ceph/decode.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ceph/decode.h>
+
+static int
+ceph_decode_entity_addr_versioned(void **p, void *end,
+ struct ceph_entity_addr *addr)
+{
+ int ret;
+ u8 struct_v;
+ u32 struct_len, addr_len;
+ void *struct_end;
+
+ ret = ceph_start_decoding(p, end, 1, "entity_addr_t", &struct_v,
+ &struct_len);
+ if (ret)
+ goto bad;
+
+ ret = -EINVAL;
+ struct_end = *p + struct_len;
+
+ ceph_decode_copy_safe(p, end, &addr->type, sizeof(addr->type), bad);
+
+ ceph_decode_copy_safe(p, end, &addr->nonce, sizeof(addr->nonce), bad);
+
+ ceph_decode_32_safe(p, end, addr_len, bad);
+ if (addr_len > sizeof(addr->in_addr))
+ goto bad;
+
+ memset(&addr->in_addr, 0, sizeof(addr->in_addr));
+ if (addr_len) {
+ ceph_decode_copy_safe(p, end, &addr->in_addr, addr_len, bad);
+
+ addr->in_addr.ss_family =
+ le16_to_cpu((__force __le16)addr->in_addr.ss_family);
+ }
+
+ /* Advance past anything the client doesn't yet understand */
+ *p = struct_end;
+ ret = 0;
+bad:
+ return ret;
+}
+
+static int
+ceph_decode_entity_addr_legacy(void **p, void *end,
+ struct ceph_entity_addr *addr)
+{
+ int ret = -EINVAL;
+
+ /* Skip rest of type field */
+ ceph_decode_skip_n(p, end, 3, bad);
+
+ /*
+ * Clients that don't support ADDR2 always send TYPE_NONE, change it
+ * to TYPE_LEGACY for forward compatibility.
+ */
+ addr->type = CEPH_ENTITY_ADDR_TYPE_LEGACY;
+ ceph_decode_copy_safe(p, end, &addr->nonce, sizeof(addr->nonce), bad);
+ memset(&addr->in_addr, 0, sizeof(addr->in_addr));
+ ceph_decode_copy_safe(p, end, &addr->in_addr,
+ sizeof(addr->in_addr), bad);
+ addr->in_addr.ss_family =
+ be16_to_cpu((__force __be16)addr->in_addr.ss_family);
+ ret = 0;
+bad:
+ return ret;
+}
+
+int
+ceph_decode_entity_addr(void **p, void *end, struct ceph_entity_addr *addr)
+{
+ u8 marker;
+
+ ceph_decode_8_safe(p, end, marker, bad);
+ if (marker == 1)
+ return ceph_decode_entity_addr_versioned(p, end, addr);
+ else if (marker == 0)
+ return ceph_decode_entity_addr_legacy(p, end, addr);
+bad:
+ return -EINVAL;
+}
+EXPORT_SYMBOL(ceph_decode_entity_addr);
+
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index a33402c99321..962f521c863e 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -199,12 +199,14 @@ const char *ceph_pr_addr(const struct ceph_entity_addr *addr)
switch (ss.ss_family) {
case AF_INET:
- snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%hu", &in4->sin_addr,
+ snprintf(s, MAX_ADDR_STR_LEN, "(%d)%pI4:%hu",
+ le32_to_cpu(addr->type), &in4->sin_addr,
ntohs(in4->sin_port));
break;
case AF_INET6:
- snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%hu", &in6->sin6_addr,
+ snprintf(s, MAX_ADDR_STR_LEN, "(%d)[%pI6c]:%hu",
+ le32_to_cpu(addr->type), &in6->sin6_addr,
ntohs(in6->sin6_port));
break;
@@ -220,7 +222,7 @@ EXPORT_SYMBOL(ceph_pr_addr);
static void encode_my_addr(struct ceph_messenger *msgr)
{
memcpy(&msgr->my_enc_addr, &msgr->inst.addr, sizeof(msgr->my_enc_addr));
- ceph_encode_addr(&msgr->my_enc_addr);
+ ceph_encode_banner_addr(&msgr->my_enc_addr);
}
/*
@@ -1732,12 +1734,14 @@ static int read_partial_banner(struct ceph_connection *con)
ret = read_partial(con, end, size, &con->actual_peer_addr);
if (ret <= 0)
goto out;
+ ceph_decode_banner_addr(&con->actual_peer_addr);
size = sizeof (con->peer_addr_for_me);
end += size;
ret = read_partial(con, end, size, &con->peer_addr_for_me);
if (ret <= 0)
goto out;
+ ceph_decode_banner_addr(&con->peer_addr_for_me);
out:
return ret;
@@ -1981,6 +1985,7 @@ int ceph_parse_ips(const char *c, const char *end,
}
addr_set_port(&addr[i], port);
+ addr[i].type = CEPH_ENTITY_ADDR_TYPE_LEGACY;
dout("parse_ips got %s\n", ceph_pr_addr(&addr[i]));
@@ -2011,9 +2016,6 @@ static int process_banner(struct ceph_connection *con)
if (verify_hello(con) < 0)
return -1;
- ceph_decode_addr(&con->actual_peer_addr);
- ceph_decode_addr(&con->peer_addr_for_me);
-
/*
* Make sure the other end is who we wanted. note that the other
* end may not yet know their ip address, so if it's 0.0.0.0, give
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 895679d3529b..0520bf9825aa 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -39,7 +39,7 @@ static int __validate_auth(struct ceph_mon_client *monc);
/*
* Decode a monmap blob (e.g., during mount).
*/
-struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
+static struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
{
struct ceph_monmap *m = NULL;
int i, err = -EINVAL;
@@ -50,7 +50,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
ceph_decode_32_safe(&p, end, len, bad);
ceph_decode_need(&p, end, len, bad);
- dout("monmap_decode %p %p len %d\n", p, end, (int)(end-p));
+ dout("monmap_decode %p %p len %d (%d)\n", p, end, len, (int)(end-p));
p += sizeof(u16); /* skip version */
ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad);
@@ -58,7 +58,6 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
epoch = ceph_decode_32(&p);
num_mon = ceph_decode_32(&p);
- ceph_decode_need(&p, end, num_mon*sizeof(m->mon_inst[0]), bad);
if (num_mon > CEPH_MAX_MON)
goto bad;
@@ -68,17 +67,22 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
m->fsid = fsid;
m->epoch = epoch;
m->num_mon = num_mon;
- ceph_decode_copy(&p, m->mon_inst, num_mon*sizeof(m->mon_inst[0]));
- for (i = 0; i < num_mon; i++)
- ceph_decode_addr(&m->mon_inst[i].addr);
-
+ for (i = 0; i < num_mon; ++i) {
+ struct ceph_entity_inst *inst = &m->mon_inst[i];
+
+ /* copy name portion */
+ ceph_decode_copy_safe(&p, end, &inst->name,
+ sizeof(inst->name), bad);
+ err = ceph_decode_entity_addr(&p, end, &inst->addr);
+ if (err)
+ goto bad;
+ }
dout("monmap_decode epoch %d, num_mon %d\n", m->epoch,
m->num_mon);
for (i = 0; i < m->num_mon; i++)
dout("monmap_decode mon%d is %s\n", i,
ceph_pr_addr(&m->mon_inst[i].addr));
return m;
-
bad:
dout("monmap_decode failed with %d\n", err);
kfree(m);
@@ -469,6 +473,7 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
if (IS_ERR(monmap)) {
pr_err("problem decoding monmap, %d\n",
(int)PTR_ERR(monmap));
+ ceph_msg_dump(msg);
goto out;
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 9a8eca5eda65..0b2df09b2554 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -171,14 +171,6 @@ static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data,
osd_data->num_bvecs = num_bvecs;
}
-#define osd_req_op_data(oreq, whch, typ, fld) \
-({ \
- struct ceph_osd_request *__oreq = (oreq); \
- unsigned int __whch = (whch); \
- BUG_ON(__whch >= __oreq->r_num_ops); \
- &__oreq->r_ops[__whch].typ.fld; \
-})
-
static struct ceph_osd_data *
osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
{
@@ -478,7 +470,7 @@ static void request_release_checks(struct ceph_osd_request *req)
{
WARN_ON(!RB_EMPTY_NODE(&req->r_node));
WARN_ON(!RB_EMPTY_NODE(&req->r_mc_node));
- WARN_ON(!list_empty(&req->r_unsafe_item));
+ WARN_ON(!list_empty(&req->r_private_item));
WARN_ON(req->r_osd);
}
@@ -538,7 +530,7 @@ static void request_init(struct ceph_osd_request *req)
init_completion(&req->r_completion);
RB_CLEAR_NODE(&req->r_node);
RB_CLEAR_NODE(&req->r_mc_node);
- INIT_LIST_HEAD(&req->r_unsafe_item);
+ INIT_LIST_HEAD(&req->r_private_item);
target_init(&req->r_t);
}
@@ -4914,20 +4906,26 @@ static int decode_watcher(void **p, void *end, struct ceph_watch_item *item)
ret = ceph_start_decoding(p, end, 2, "watch_item_t",
&struct_v, &struct_len);
if (ret)
- return ret;
+ goto bad;
+
+ ret = -EINVAL;
+ ceph_decode_copy_safe(p, end, &item->name, sizeof(item->name), bad);
+ ceph_decode_64_safe(p, end, item->cookie, bad);
+ ceph_decode_skip_32(p, end, bad); /* skip timeout seconds */
- ceph_decode_copy(p, &item->name, sizeof(item->name));
- item->cookie = ceph_decode_64(p);
- *p += 4; /* skip timeout_seconds */
if (struct_v >= 2) {
- ceph_decode_copy(p, &item->addr, sizeof(item->addr));
- ceph_decode_addr(&item->addr);
+ ret = ceph_decode_entity_addr(p, end, &item->addr);
+ if (ret)
+ goto bad;
+ } else {
+ ret = 0;
}
dout("%s %s%llu cookie %llu addr %s\n", __func__,
ENTITY_NAME(item->name), item->cookie,
ceph_pr_addr(&item->addr));
- return 0;
+bad:
+ return ret;
}
static int decode_watchers(void **p, void *end,
@@ -5044,12 +5042,12 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
const char *class, const char *method,
unsigned int flags,
struct page *req_page, size_t req_len,
- struct page *resp_page, size_t *resp_len)
+ struct page **resp_pages, size_t *resp_len)
{
struct ceph_osd_request *req;
int ret;
- if (req_len > PAGE_SIZE || (resp_page && *resp_len > PAGE_SIZE))
+ if (req_len > PAGE_SIZE)
return -E2BIG;
req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO);
@@ -5067,8 +5065,8 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
if (req_page)
osd_req_op_cls_request_data_pages(req, 0, &req_page, req_len,
0, false, false);
- if (resp_page)
- osd_req_op_cls_response_data_pages(req, 0, &resp_page,
+ if (resp_pages)
+ osd_req_op_cls_response_data_pages(req, 0, resp_pages,
*resp_len, 0, false, false);
ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
@@ -5079,7 +5077,7 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
ret = ceph_osdc_wait_request(osdc, req);
if (ret >= 0) {
ret = req->r_ops[0].rval;
- if (resp_page)
+ if (resp_pages)
*resp_len = req->r_ops[0].outdata_len;
}
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 48a31dc9161c..90437906b7bc 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1489,11 +1489,9 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
/* osd_state, osd_weight, osd_addrs->client_addr */
ceph_decode_need(p, end, 3*sizeof(u32) +
- map->max_osd*((struct_v >= 5 ? sizeof(u32) :
- sizeof(u8)) +
- sizeof(*map->osd_weight) +
- sizeof(*map->osd_addr)), e_inval);
-
+ map->max_osd*(struct_v >= 5 ? sizeof(u32) :
+ sizeof(u8)) +
+ sizeof(*map->osd_weight), e_inval);
if (ceph_decode_32(p) != map->max_osd)
goto e_inval;
@@ -1514,9 +1512,11 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
if (ceph_decode_32(p) != map->max_osd)
goto e_inval;
- ceph_decode_copy(p, map->osd_addr, map->max_osd*sizeof(*map->osd_addr));
- for (i = 0; i < map->max_osd; i++)
- ceph_decode_addr(&map->osd_addr[i]);
+ for (i = 0; i < map->max_osd; i++) {
+ err = ceph_decode_entity_addr(p, end, &map->osd_addr[i]);
+ if (err)
+ goto bad;
+ }
/* pg_temp */
err = decode_pg_temp(p, end, map);
@@ -1618,12 +1618,17 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
void *new_state;
void *new_weight_end;
u32 len;
+ int i;
new_up_client = *p;
ceph_decode_32_safe(p, end, len, e_inval);
- len *= sizeof(u32) + sizeof(struct ceph_entity_addr);
- ceph_decode_need(p, end, len, e_inval);
- *p += len;
+ for (i = 0; i < len; ++i) {
+ struct ceph_entity_addr addr;
+
+ ceph_decode_skip_32(p, end, e_inval);
+ if (ceph_decode_entity_addr(p, end, &addr))
+ goto e_inval;
+ }
new_state = *p;
ceph_decode_32_safe(p, end, len, e_inval);
@@ -1699,9 +1704,9 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
struct ceph_entity_addr addr;
osd = ceph_decode_32(p);
- ceph_decode_copy(p, &addr, sizeof(addr));
- ceph_decode_addr(&addr);
BUG_ON(osd >= map->max_osd);
+ if (ceph_decode_entity_addr(p, end, &addr))
+ goto e_inval;
pr_info("osd%d up\n", osd);
map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
map->osd_addr[osd] = addr;
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 74cafc0142ea..64305e7056a1 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -10,39 +10,6 @@
#include <linux/ceph/libceph.h>
-/*
- * build a vector of user pages
- */
-struct page **ceph_get_direct_page_vector(const void __user *data,
- int num_pages, bool write_page)
-{
- struct page **pages;
- int got = 0;
- int rc = 0;
-
- pages = kmalloc_array(num_pages, sizeof(*pages), GFP_NOFS);
- if (!pages)
- return ERR_PTR(-ENOMEM);
-
- while (got < num_pages) {
- rc = get_user_pages_fast(
- (unsigned long)data + ((unsigned long)got * PAGE_SIZE),
- num_pages - got, write_page ? FOLL_WRITE : 0, pages + got);
- if (rc < 0)
- break;
- BUG_ON(rc == 0);
- got += rc;
- }
- if (rc < 0)
- goto fail;
- return pages;
-
-fail:
- ceph_put_page_vector(pages, got, false);
- return ERR_PTR(rc);
-}
-EXPORT_SYMBOL(ceph_get_direct_page_vector);
-
void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
{
int i;
diff --git a/net/ceph/striper.c b/net/ceph/striper.c
index c36462dc86b7..3b3fa75d1189 100644
--- a/net/ceph/striper.c
+++ b/net/ceph/striper.c
@@ -259,3 +259,20 @@ int ceph_extent_to_file(struct ceph_file_layout *l,
return 0;
}
EXPORT_SYMBOL(ceph_extent_to_file);
+
+u64 ceph_get_num_objects(struct ceph_file_layout *l, u64 size)
+{
+ u64 period = (u64)l->stripe_count * l->object_size;
+ u64 num_periods = DIV64_U64_ROUND_UP(size, period);
+ u64 remainder_bytes;
+ u64 remainder_objs = 0;
+
+ div64_u64_rem(size, period, &remainder_bytes);
+ if (remainder_bytes > 0 &&
+ remainder_bytes < (u64)l->stripe_count * l->stripe_unit)
+ remainder_objs = l->stripe_count -
+ DIV_ROUND_UP_ULL(remainder_bytes, l->stripe_unit);
+
+ return num_periods * l->stripe_count - remainder_objs;
+}
+EXPORT_SYMBOL(ceph_get_num_objects);
diff --git a/net/compat.c b/net/compat.c
index 3f9ce609397f..0f7ded26059e 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -80,9 +80,10 @@ int get_compat_msghdr(struct msghdr *kmsg,
kmsg->msg_iocb = NULL;
- return compat_import_iovec(save_addr ? READ : WRITE,
+ err = compat_import_iovec(save_addr ? READ : WRITE,
compat_ptr(msg.msg_iov), msg.msg_iovlen,
UIO_FASTIOV, iov, &kmsg->msg_iter);
+ return err < 0 ? err : 0;
}
/* Bleech... */
diff --git a/net/core/dev.c b/net/core/dev.c
index fc676b2610e3..0891f499c1bb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4374,12 +4374,17 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
act = bpf_prog_run_xdp(xdp_prog, xdp);
+ /* check if bpf_xdp_adjust_head was used */
off = xdp->data - orig_data;
- if (off > 0)
- __skb_pull(skb, off);
- else if (off < 0)
- __skb_push(skb, -off);
- skb->mac_header += off;
+ if (off) {
+ if (off > 0)
+ __skb_pull(skb, off);
+ else if (off < 0)
+ __skb_push(skb, -off);
+
+ skb->mac_header += off;
+ skb_reset_network_header(skb);
+ }
/* check if bpf_xdp_adjust_tail was used. it can only "shrink"
* pckt.
@@ -9701,6 +9706,8 @@ static void __net_exit default_device_exit(struct net *net)
/* Push remaining network devices to init_net */
snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
+ if (__dev_get_by_name(&init_net, fb_name))
+ snprintf(fb_name, IFNAMSIZ, "dev%%d");
err = dev_change_net_namespace(dev, &init_net, fb_name);
if (err) {
pr_emerg("%s: failed to move %s to init_net: %d\n",
diff --git a/net/core/filter.c b/net/core/filter.c
index 47f6386fb17a..7878f918b8c0 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4335,7 +4335,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
TCP_CA_NAME_MAX-1));
name[TCP_CA_NAME_MAX-1] = 0;
ret = tcp_set_congestion_control(sk, name, false,
- reinit);
+ reinit, true);
} else {
struct tcp_sock *tp = tcp_sk(sk);
@@ -6884,20 +6884,30 @@ static bool sock_addr_is_valid_access(int off, int size,
case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
msg_src_ip6[3]):
- /* Only narrow read access allowed for now. */
if (type == BPF_READ) {
bpf_ctx_record_field_size(info, size_default);
+
+ if (bpf_ctx_wide_access_ok(off, size,
+ struct bpf_sock_addr,
+ user_ip6))
+ return true;
+
+ if (bpf_ctx_wide_access_ok(off, size,
+ struct bpf_sock_addr,
+ msg_src_ip6))
+ return true;
+
if (!bpf_ctx_narrow_access_ok(off, size, size_default))
return false;
} else {
- if (bpf_ctx_wide_store_ok(off, size,
- struct bpf_sock_addr,
- user_ip6))
+ if (bpf_ctx_wide_access_ok(off, size,
+ struct bpf_sock_addr,
+ user_ip6))
return true;
- if (bpf_ctx_wide_store_ok(off, size,
- struct bpf_sock_addr,
- msg_src_ip6))
+ if (bpf_ctx_wide_access_ok(off, size,
+ struct bpf_sock_addr,
+ msg_src_ip6))
return true;
if (size != size_default)
@@ -7445,12 +7455,12 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, gso_segs):
/* si->dst_reg = skb_shinfo(SKB); */
#ifdef NET_SKBUFF_DATA_USES_OFFSET
- *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
- si->dst_reg, si->src_reg,
- offsetof(struct sk_buff, head));
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
BPF_REG_AX, si->src_reg,
offsetof(struct sk_buff, end));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_buff, head));
*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
#else
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 76f8db3841d7..d63b970784dc 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -165,7 +165,7 @@ void flow_rule_match_enc_opts(const struct flow_rule *rule,
}
EXPORT_SYMBOL(flow_rule_match_enc_opts);
-struct flow_block_cb *flow_block_cb_alloc(struct net *net, tc_setup_cb_t *cb,
+struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb,
void *cb_ident, void *cb_priv,
void (*release)(void *cb_priv))
{
@@ -175,7 +175,6 @@ struct flow_block_cb *flow_block_cb_alloc(struct net *net, tc_setup_cb_t *cb,
if (!block_cb)
return ERR_PTR(-ENOMEM);
- block_cb->net = net;
block_cb->cb = cb;
block_cb->cb_ident = cb_ident;
block_cb->cb_priv = cb_priv;
@@ -194,14 +193,13 @@ void flow_block_cb_free(struct flow_block_cb *block_cb)
}
EXPORT_SYMBOL(flow_block_cb_free);
-struct flow_block_cb *flow_block_cb_lookup(struct flow_block_offload *f,
- tc_setup_cb_t *cb, void *cb_ident)
+struct flow_block_cb *flow_block_cb_lookup(struct flow_block *block,
+ flow_setup_cb_t *cb, void *cb_ident)
{
struct flow_block_cb *block_cb;
- list_for_each_entry(block_cb, f->driver_block_list, driver_list) {
- if (block_cb->net == f->net &&
- block_cb->cb == cb &&
+ list_for_each_entry(block_cb, &block->cb_list, list) {
+ if (block_cb->cb == cb &&
block_cb->cb_ident == cb_ident)
return block_cb;
}
@@ -228,7 +226,7 @@ unsigned int flow_block_cb_decref(struct flow_block_cb *block_cb)
}
EXPORT_SYMBOL(flow_block_cb_decref);
-bool flow_block_cb_is_busy(tc_setup_cb_t *cb, void *cb_ident,
+bool flow_block_cb_is_busy(flow_setup_cb_t *cb, void *cb_ident,
struct list_head *driver_block_list)
{
struct flow_block_cb *block_cb;
@@ -245,7 +243,8 @@ EXPORT_SYMBOL(flow_block_cb_is_busy);
int flow_block_cb_setup_simple(struct flow_block_offload *f,
struct list_head *driver_block_list,
- tc_setup_cb_t *cb, void *cb_ident, void *cb_priv,
+ flow_setup_cb_t *cb,
+ void *cb_ident, void *cb_priv,
bool ingress_only)
{
struct flow_block_cb *block_cb;
@@ -261,8 +260,7 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f,
if (flow_block_cb_is_busy(cb, cb_ident, driver_block_list))
return -EBUSY;
- block_cb = flow_block_cb_alloc(f->net, cb, cb_ident,
- cb_priv, NULL);
+ block_cb = flow_block_cb_alloc(cb, cb_ident, cb_priv, NULL);
if (IS_ERR(block_cb))
return PTR_ERR(block_cb);
@@ -270,7 +268,7 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f,
list_add_tail(&block_cb->driver_list, driver_block_list);
return 0;
case FLOW_BLOCK_UNBIND:
- block_cb = flow_block_cb_lookup(f, cb, cb_ident);
+ block_cb = flow_block_cb_lookup(f->block, cb, cb_ident);
if (!block_cb)
return -ENOENT;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 742cea4ce72e..f79e61c570ea 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1124,6 +1124,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
atomic_set(&neigh->probes,
NEIGH_VAR(neigh->parms, UCAST_PROBES));
+ neigh_del_timer(neigh);
neigh->nud_state = NUD_INCOMPLETE;
neigh->updated = now;
next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
@@ -1140,6 +1141,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
}
} else if (neigh->nud_state & NUD_STALE) {
neigh_dbg(2, "neigh %p is delayed\n", neigh);
+ neigh_del_timer(neigh);
neigh->nud_state = NUD_DELAY;
neigh->updated = jiffies;
neigh_add_timer(neigh, jiffies +
@@ -3374,8 +3376,6 @@ void neigh_app_ns(struct neighbour *n)
EXPORT_SYMBOL(neigh_app_ns);
#ifdef CONFIG_SYSCTL
-static int zero;
-static int int_max = INT_MAX;
static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
static int proc_unres_qlen(struct ctl_table *ctl, int write,
@@ -3384,7 +3384,7 @@ static int proc_unres_qlen(struct ctl_table *ctl, int write,
int size, ret;
struct ctl_table tmp = *ctl;
- tmp.extra1 = &zero;
+ tmp.extra1 = SYSCTL_ZERO;
tmp.extra2 = &unres_qlen_max;
tmp.data = &size;
@@ -3449,8 +3449,8 @@ static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
struct ctl_table tmp = *ctl;
int ret;
- tmp.extra1 = &zero;
- tmp.extra2 = &int_max;
+ tmp.extra1 = SYSCTL_ZERO;
+ tmp.extra2 = SYSCTL_INT_MAX;
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
neigh_proc_update(ctl, write);
@@ -3595,24 +3595,24 @@ static struct neigh_sysctl_table {
.procname = "gc_thresh1",
.maxlen = sizeof(int),
.mode = 0644,
- .extra1 = &zero,
- .extra2 = &int_max,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
.proc_handler = proc_dointvec_minmax,
},
[NEIGH_VAR_GC_THRESH2] = {
.procname = "gc_thresh2",
.maxlen = sizeof(int),
.mode = 0644,
- .extra1 = &zero,
- .extra2 = &int_max,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
.proc_handler = proc_dointvec_minmax,
},
[NEIGH_VAR_GC_THRESH3] = {
.procname = "gc_thresh3",
.maxlen = sizeof(int),
.mode = 0644,
- .extra1 = &zero,
- .extra2 = &int_max,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
.proc_handler = proc_dointvec_minmax,
},
{},
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6f1e31f674a3..0338820ee0ec 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -762,7 +762,7 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
printk("%sdev name=%s feat=0x%pNF\n",
level, dev->name, &dev->features);
if (sk)
- printk("%ssk family=%hu type=%hu proto=%hu\n",
+ printk("%ssk family=%hu type=%u proto=%u\n",
level, sk->sk_family, sk->sk_type, sk->sk_protocol);
if (full_pkt && headroom)
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 93bffaad2135..6832eeb4b785 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -585,12 +585,12 @@ EXPORT_SYMBOL_GPL(sk_psock_destroy);
void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
{
- rcu_assign_sk_user_data(sk, NULL);
sk_psock_cork_free(psock);
sk_psock_zap_ingress(psock);
- sk_psock_restore_proto(sk, psock);
write_lock_bh(&sk->sk_callback_lock);
+ sk_psock_restore_proto(sk, psock);
+ rcu_assign_sk_user_data(sk, NULL);
if (psock->progs.skb_parser)
sk_psock_stop_strp(sk, psock);
write_unlock_bh(&sk->sk_callback_lock);
diff --git a/net/core/sock.c b/net/core/sock.c
index 3e073ca6138f..6d08553f885c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1597,7 +1597,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
if (!sk)
return sk;
- if (priority & __GFP_ZERO)
+ if (want_init_on_alloc(priority))
sk_prot_clear_nulls(sk, prot->obj_size);
} else
sk = kmalloc(prot->obj_size, priority);
@@ -1992,6 +1992,19 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
}
EXPORT_SYMBOL(skb_set_owner_w);
+static bool can_skb_orphan_partial(const struct sk_buff *skb)
+{
+#ifdef CONFIG_TLS_DEVICE
+ /* Drivers depend on in-order delivery for crypto offload,
+ * partial orphan breaks out-of-order-OK logic.
+ */
+ if (skb->decrypted)
+ return false;
+#endif
+ return (skb->destructor == sock_wfree ||
+ (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
+}
+
/* This helper is used by netem, as it can hold packets in its
* delay queue. We want to allow the owner socket to send more
* packets, as if they were already TX completed by a typical driver.
@@ -2003,11 +2016,7 @@ void skb_orphan_partial(struct sk_buff *skb)
if (skb_is_tcp_pure_ack(skb))
return;
- if (skb->destructor == sock_wfree
-#ifdef CONFIG_INET
- || skb->destructor == tcp_wfree
-#endif
- ) {
+ if (can_skb_orphan_partial(skb)) {
struct sock *sk = skb->sk;
if (refcount_inc_not_zero(&sk->sk_refcnt)) {
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 3312a5849a97..c13ffbd33d8d 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -19,6 +19,7 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
static DEFINE_MUTEX(sock_diag_table_mutex);
static struct workqueue_struct *broadcast_wq;
+static atomic64_t cookie_gen;
u64 sock_gen_cookie(struct sock *sk)
{
@@ -27,7 +28,7 @@ u64 sock_gen_cookie(struct sock *sk)
if (res)
return res;
- res = atomic64_inc_return(&sock_net(sk)->cookie_gen);
+ res = atomic64_inc_return(&cookie_gen);
atomic64_cmpxchg(&sk->sk_cookie, 0, res);
}
}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 52d4faeee18b..1330a7442e5b 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -247,6 +247,8 @@ static void sock_map_free(struct bpf_map *map)
raw_spin_unlock_bh(&stab->lock);
rcu_read_unlock();
+ synchronize_rcu();
+
bpf_map_area_free(stab->sks);
kfree(stab);
}
@@ -276,16 +278,20 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
struct sock **psk)
{
struct sock *sk;
+ int err = 0;
raw_spin_lock_bh(&stab->lock);
sk = *psk;
if (!sk_test || sk_test == sk)
- *psk = NULL;
+ sk = xchg(psk, NULL);
+
+ if (likely(sk))
+ sock_map_unref(sk, psk);
+ else
+ err = -EINVAL;
+
raw_spin_unlock_bh(&stab->lock);
- if (unlikely(!sk))
- return -EINVAL;
- sock_map_unref(sk, psk);
- return 0;
+ return err;
}
static void sock_map_delete_from_link(struct bpf_map *map, struct sock *sk,
@@ -328,6 +334,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
struct sock *sk, u64 flags)
{
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+ struct inet_connection_sock *icsk = inet_csk(sk);
struct sk_psock_link *link;
struct sk_psock *psock;
struct sock *osk;
@@ -338,6 +345,8 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
return -EINVAL;
if (unlikely(idx >= map->max_entries))
return -E2BIG;
+ if (unlikely(icsk->icsk_ulp_data))
+ return -EINVAL;
link = sk_psock_init_link();
if (!link)
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index f9204719aeee..8da5b3a54dac 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -22,8 +22,6 @@
#include <net/busy_poll.h>
#include <net/pkt_sched.h>
-static int zero = 0;
-static int one = 1;
static int two __maybe_unused = 2;
static int min_sndbuf = SOCK_MIN_SNDBUF;
static int min_rcvbuf = SOCK_MIN_RCVBUF;
@@ -390,10 +388,10 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax_bpf_enable,
# ifdef CONFIG_BPF_JIT_ALWAYS_ON
- .extra1 = &one,
- .extra2 = &one,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_ONE,
# else
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &two,
# endif
},
@@ -404,7 +402,7 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(int),
.mode = 0600,
.proc_handler = proc_dointvec_minmax_bpf_restricted,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
{
@@ -413,8 +411,8 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(int),
.mode = 0600,
.proc_handler = proc_dointvec_minmax_bpf_restricted,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
# endif
{
@@ -461,8 +459,8 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
},
#ifdef CONFIG_RPS
{
@@ -493,7 +491,7 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
{
.procname = "busy_read",
@@ -501,7 +499,7 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
#endif
#ifdef CONFIG_NET_SCHED
@@ -533,7 +531,7 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
.extra2 = &max_skb_frags,
},
{
@@ -542,7 +540,7 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
{
.procname = "fb_tunnels_only_for_init_net",
@@ -550,8 +548,8 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "devconf_inherit_init_net",
@@ -559,7 +557,7 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
{
@@ -578,7 +576,7 @@ static struct ctl_table netns_core_table[] = {
.data = &init_net.core.sysctl_somaxconn,
.maxlen = sizeof(int),
.mode = 0644,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.proc_handler = proc_dointvec_minmax
},
{ }
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index b59040f268a9..ee8d4f5afa72 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -16,9 +16,7 @@
#endif
/* Boundary values */
-static int zero = 0,
- one = 1,
- u8_max = 0xFF;
+static int u8_max = 0xFF;
static unsigned long seqw_min = DCCPF_SEQ_WMIN,
seqw_max = 0xFFFFFFFF; /* maximum on 32 bit */
@@ -38,7 +36,7 @@ static struct ctl_table dccp_default_table[] = {
.maxlen = sizeof(sysctl_dccp_rx_ccid),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &u8_max, /* RFC 4340, 10. */
},
{
@@ -47,7 +45,7 @@ static struct ctl_table dccp_default_table[] = {
.maxlen = sizeof(sysctl_dccp_tx_ccid),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &u8_max, /* RFC 4340, 10. */
},
{
@@ -56,7 +54,7 @@ static struct ctl_table dccp_default_table[] = {
.maxlen = sizeof(sysctl_dccp_request_retries),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
.extra2 = &u8_max,
},
{
@@ -65,7 +63,7 @@ static struct ctl_table dccp_default_table[] = {
.maxlen = sizeof(sysctl_dccp_retries1),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &u8_max,
},
{
@@ -74,7 +72,7 @@ static struct ctl_table dccp_default_table[] = {
.maxlen = sizeof(sysctl_dccp_retries2),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &u8_max,
},
{
@@ -83,7 +81,7 @@ static struct ctl_table dccp_default_table[] = {
.maxlen = sizeof(sysctl_dccp_tx_qlen),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
{
.procname = "sync_ratelimit",
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 614c38ece104..33f41178afcc 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -951,7 +951,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
struct flow_block_offload *f)
{
struct flow_block_cb *block_cb;
- tc_setup_cb_t *cb;
+ flow_setup_cb_t *cb;
if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
cb = dsa_slave_setup_tc_block_cb_ig;
@@ -967,7 +967,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
if (flow_block_cb_is_busy(cb, dev, &dsa_slave_block_cb_list))
return -EBUSY;
- block_cb = flow_block_cb_alloc(f->net, cb, dev, dev, NULL);
+ block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
if (IS_ERR(block_cb))
return PTR_ERR(block_cb);
@@ -975,7 +975,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
list_add_tail(&block_cb->driver_list, &dsa_slave_block_cb_list);
return 0;
case FLOW_BLOCK_UNBIND:
- block_cb = flow_block_cb_lookup(f, cb, dev);
+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
if (!block_cb)
return -ENOENT;
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 4ec5b7f85d51..09d9286b27cc 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -153,6 +153,9 @@ static void dsa_switch_mdb_add_bitmap(struct dsa_switch *ds,
{
int port;
+ if (!ds->ops->port_mdb_add)
+ return;
+
for_each_set_bit(port, bitmap, ds->num_ports)
ds->ops->port_mdb_add(ds, port, mdb);
}
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 1d96c9d4a8e9..47ee88163a9d 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -165,6 +165,7 @@ static struct sk_buff
"Expected meta frame, is %12llx "
"in the DSA master multicast filter?\n",
SJA1105_META_DMAC);
+ kfree_skb(sp->data->stampable_skb);
}
/* Hold a reference to avoid dsa_switch_rcv
@@ -211,16 +212,8 @@ static struct sk_buff
* for further processing up the network stack.
*/
kfree_skb(skb);
-
- skb = skb_copy(stampable_skb, GFP_ATOMIC);
- if (!skb) {
- dev_err_ratelimited(dp->ds->dev,
- "Failed to copy stampable skb\n");
- return NULL;
- }
+ skb = stampable_skb;
sja1105_transfer_meta(skb, meta);
- /* The cached copy will be freed now */
- skb_unref(stampable_skb);
spin_unlock(&sp->data->meta_lock);
}
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index f0f9b493c47b..f509b495451a 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -227,13 +227,8 @@ static int hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev)
struct hsr_port *master;
master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
- if (master) {
- skb->dev = master->dev;
- hsr_forward_skb(skb, master);
- } else {
- atomic_long_inc(&dev->tx_dropped);
- dev_kfree_skb_any(skb);
- }
+ skb->dev = master->dev;
+ hsr_forward_skb(skb, master);
return NETDEV_TX_OK;
}
@@ -348,7 +343,11 @@ static void hsr_announce(struct timer_list *t)
rcu_read_unlock();
}
-void hsr_dev_destroy(struct net_device *hsr_dev)
+/* This has to be called after all the readers are gone.
+ * Otherwise we would have to check the return value of
+ * hsr_port_get_hsr().
+ */
+static void hsr_dev_destroy(struct net_device *hsr_dev)
{
struct hsr_priv *hsr;
struct hsr_port *port;
@@ -364,8 +363,6 @@ void hsr_dev_destroy(struct net_device *hsr_dev)
del_timer_sync(&hsr->prune_timer);
del_timer_sync(&hsr->announce_timer);
- synchronize_rcu();
-
hsr_del_self_node(&hsr->self_node_db);
hsr_del_nodes(&hsr->node_db);
}
@@ -376,6 +373,7 @@ static const struct net_device_ops hsr_device_ops = {
.ndo_stop = hsr_dev_close,
.ndo_start_xmit = hsr_dev_xmit,
.ndo_fix_features = hsr_fix_features,
+ .ndo_uninit = hsr_dev_destroy,
};
static struct device_type hsr_type = {
diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h
index d0fa6b0696d2..6d7759c4f5f9 100644
--- a/net/hsr/hsr_device.h
+++ b/net/hsr/hsr_device.h
@@ -14,7 +14,6 @@
void hsr_dev_setup(struct net_device *dev);
int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
unsigned char multicast_spec, u8 protocol_version);
-void hsr_dev_destroy(struct net_device *hsr_dev);
void hsr_check_carrier_and_operstate(struct hsr_priv *hsr);
bool is_hsr_master(struct net_device *dev);
int hsr_get_max_mtu(struct hsr_priv *hsr);
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 160edd24de4e..8f8337f893ba 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -69,12 +69,6 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev,
return hsr_dev_finalize(dev, link, multicast_spec, hsr_version);
}
-static void hsr_dellink(struct net_device *hsr_dev, struct list_head *head)
-{
- hsr_dev_destroy(hsr_dev);
- unregister_netdevice_queue(hsr_dev, head);
-}
-
static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct hsr_priv *hsr;
@@ -119,7 +113,6 @@ static struct rtnl_link_ops hsr_link_ops __read_mostly = {
.priv_size = sizeof(struct hsr_priv),
.setup = hsr_dev_setup,
.newlink = hsr_newlink,
- .dellink = hsr_dellink,
.fill_info = hsr_fill_info,
};
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index e4aba5d485be..bbe9b3b2d395 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -170,7 +170,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *skb,
reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
if (!reasm_data)
goto out_oom;
- inet_frag_reasm_finish(&fq->q, skb, reasm_data);
+ inet_frag_reasm_finish(&fq->q, skb, reasm_data, false);
skb->dev = ldev;
skb->tstamp = fq->q.stamp;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 317339cd7f03..e8bc939b56dd 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -388,6 +388,11 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
fib_combine_itag(itag, &res);
dev_match = fib_info_nh_uses_dev(res.fi, dev);
+ /* This is not common, loopback packets retain skb_dst so normally they
+ * would not even hit this slow path.
+ */
+ dev_match = dev_match || (res.type == RTN_LOCAL &&
+ dev == net->loopback_dev);
if (dev_match) {
ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST;
return ret;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index d666756be5f1..10d31733297d 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -331,7 +331,7 @@ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params);
if (!prev)
fq = inet_frag_create(fqdir, key, &prev);
- if (prev && !IS_ERR(prev)) {
+ if (!IS_ERR_OR_NULL(prev)) {
fq = prev;
if (!refcount_inc_not_zero(&fq->refcnt))
fq = NULL;
@@ -475,11 +475,12 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
EXPORT_SYMBOL(inet_frag_reasm_prepare);
void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
- void *reasm_data)
+ void *reasm_data, bool try_coalesce)
{
struct sk_buff **nextp = (struct sk_buff **)reasm_data;
struct rb_node *rbn;
struct sk_buff *fp;
+ int sum_truesize;
skb_push(head, head->data - skb_network_header(head));
@@ -487,25 +488,41 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
fp = FRAG_CB(head)->next_frag;
rbn = rb_next(&head->rbnode);
rb_erase(&head->rbnode, &q->rb_fragments);
+
+ sum_truesize = head->truesize;
while (rbn || fp) {
/* fp points to the next sk_buff in the current run;
* rbn points to the next run.
*/
/* Go through the current run. */
while (fp) {
- *nextp = fp;
- nextp = &fp->next;
- fp->prev = NULL;
- memset(&fp->rbnode, 0, sizeof(fp->rbnode));
- fp->sk = NULL;
- head->data_len += fp->len;
- head->len += fp->len;
+ struct sk_buff *next_frag = FRAG_CB(fp)->next_frag;
+ bool stolen;
+ int delta;
+
+ sum_truesize += fp->truesize;
if (head->ip_summed != fp->ip_summed)
head->ip_summed = CHECKSUM_NONE;
else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
- head->truesize += fp->truesize;
- fp = FRAG_CB(fp)->next_frag;
+
+ if (try_coalesce && skb_try_coalesce(head, fp, &stolen,
+ &delta)) {
+ kfree_skb_partial(fp, stolen);
+ } else {
+ fp->prev = NULL;
+ memset(&fp->rbnode, 0, sizeof(fp->rbnode));
+ fp->sk = NULL;
+
+ head->data_len += fp->len;
+ head->len += fp->len;
+ head->truesize += fp->truesize;
+
+ *nextp = fp;
+ nextp = &fp->next;
+ }
+
+ fp = next_frag;
}
/* Move to the next run. */
if (rbn) {
@@ -516,7 +533,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
rbn = rbnext;
}
}
- sub_frag_mem_limit(q->fqdir, head->truesize);
+ sub_frag_mem_limit(q->fqdir, sum_truesize);
*nextp = NULL;
skb_mark_not_on_list(head);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 4385eb9e781f..cfeb8890f94e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -393,6 +393,11 @@ err:
return err;
}
+static bool ip_frag_coalesce_ok(const struct ipq *qp)
+{
+ return qp->q.key.v4.user == IP_DEFRAG_LOCAL_DELIVER;
+}
+
/* Build a new IP datagram from all its fragments. */
static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
struct sk_buff *prev_tail, struct net_device *dev)
@@ -421,7 +426,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
if (len > 65535)
goto out_oversize;
- inet_frag_reasm_finish(&qp->q, skb, reasm_data);
+ inet_frag_reasm_finish(&qp->q, skb, reasm_data,
+ ip_frag_coalesce_ok(qp));
skb->dev = dev;
IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 43adfc1641ba..2f01cf6fa0de 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -275,6 +275,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
const struct iphdr *tiph = &tunnel->parms.iph;
u8 ipproto;
+ if (!pskb_inet_may_pull(skb))
+ goto tx_error;
+
switch (skb->protocol) {
case htons(ETH_P_IP):
ipproto = IPPROTO_IPIP;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 4d6bf7ac0792..6bdb1ab8af61 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -416,8 +416,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
ctinfo == IP_CT_RELATED_REPLY))
return XT_CONTINUE;
- /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO,
- * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here
+ /* nf_conntrack_proto_icmp guarantees us that we only have ICMP_ECHO,
+ * TIMESTAMP, INFO_REQUEST or ICMP_ADDRESS type icmp packets from here
* on, which all have an ID field [relevant for hashing]. */
hash = clusterip_hashfn(skb, cipinfo->config);
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 8e7f84ec783d..0e70f3f65f6f 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -36,6 +36,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
opts.options |= XT_SYNPROXY_OPT_ECN;
opts.options &= info->options;
+ opts.mss_encode = opts.mss;
+ opts.mss = info->mss;
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy_init_timestamp_cookie(info, &opts);
else
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 59031670b16a..cc23f1ce239c 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -78,6 +78,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
flow.flowi4_tos = RT_TOS(iph->tos);
flow.flowi4_scope = RT_SCOPE_UNIVERSE;
+ flow.flowi4_oif = l3mdev_master_ifindex_rcu(xt_in(par));
return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert;
}
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 87b711fd5a44..3e2685c120c7 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -221,11 +221,11 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
int ret;
rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
- ret = nf_ct_expect_related(rtp_exp);
+ ret = nf_ct_expect_related(rtp_exp, 0);
if (ret == 0) {
rtcp_exp->tuple.dst.u.udp.port =
htons(nated_port + 1);
- ret = nf_ct_expect_related(rtcp_exp);
+ ret = nf_ct_expect_related(rtcp_exp, 0);
if (ret == 0)
break;
else if (ret == -EBUSY) {
@@ -296,7 +296,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
int ret;
exp->tuple.dst.u.tcp.port = htons(nated_port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, 0);
if (ret == 0)
break;
else if (ret != -EBUSY) {
@@ -352,7 +352,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
int ret;
exp->tuple.dst.u.tcp.port = htons(nated_port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, 0);
if (ret == 0)
break;
else if (ret != -EBUSY) {
@@ -444,7 +444,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
int ret;
exp->tuple.dst.u.tcp.port = htons(nated_port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, 0);
if (ret == 0)
break;
else if (ret != -EBUSY) {
@@ -537,7 +537,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
int ret;
exp->tuple.dst.u.tcp.port = htons(nated_port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, 0);
if (ret == 0)
break;
else if (ret != -EBUSY) {
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 7d66306b5f39..0b980e841927 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -28,8 +28,6 @@
#include <net/protocol.h>
#include <net/netevent.h>
-static int zero;
-static int one = 1;
static int two = 2;
static int four = 4;
static int thousand = 1000;
@@ -576,7 +574,7 @@ static struct ctl_table ipv4_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
{
.procname = "icmp_msgs_burst",
@@ -584,7 +582,7 @@ static struct ctl_table ipv4_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
{
.procname = "udp_mem",
@@ -674,8 +672,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
{
@@ -763,8 +761,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = ipv4_fwd_update_priority,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "ip_nonlocal_bind",
@@ -794,8 +792,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
{
@@ -864,7 +862,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one
+ .extra1 = SYSCTL_ONE
},
#endif
{
@@ -969,7 +967,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
{
@@ -1011,7 +1009,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_tfo_blackhole_detect_timeout,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
#ifdef CONFIG_IP_ROUTE_MULTIPATH
{
@@ -1020,8 +1018,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "fib_multipath_hash_policy",
@@ -1029,8 +1027,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_fib_multipath_hash_policy,
- .extra1 = &zero,
- .extra2 = &two,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
{
@@ -1047,8 +1045,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
#endif
{
@@ -1078,7 +1076,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &four,
},
{
@@ -1222,7 +1220,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
.extra2 = &gso_max_segs,
},
{
@@ -1231,7 +1229,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &one_day_secs
},
{
@@ -1240,8 +1238,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "tcp_invalid_ratelimit",
@@ -1256,7 +1254,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &thousand,
},
{
@@ -1265,7 +1263,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &thousand,
},
{
@@ -1274,7 +1272,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(init_net.ipv4.sysctl_tcp_wmem),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
},
{
.procname = "tcp_rmem",
@@ -1282,7 +1280,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(init_net.ipv4.sysctl_tcp_rmem),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
},
{
.procname = "tcp_comp_sack_delay_ns",
@@ -1297,7 +1295,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &comp_sack_nr_max,
},
{
@@ -1306,7 +1304,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(init_net.ipv4.sysctl_udp_rmem_min),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one
+ .extra1 = SYSCTL_ONE
},
{
.procname = "udp_wmem_min",
@@ -1314,7 +1312,7 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(init_net.ipv4.sysctl_udp_wmem_min),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one
+ .extra1 = SYSCTL_ONE
},
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7846afacdf0b..77b485d60b9d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -984,6 +984,9 @@ new_segment:
if (!skb)
goto wait_for_memory;
+#ifdef CONFIG_TLS_DEVICE
+ skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
+#endif
skb_entail(sk, skb);
copy = size_goal;
}
@@ -2785,7 +2788,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
name[val] = 0;
lock_sock(sk);
- err = tcp_set_congestion_control(sk, name, true, true);
+ err = tcp_set_congestion_control(sk, name, true, true,
+ ns_capable(sock_net(sk)->user_ns,
+ CAP_NET_ADMIN));
release_sock(sk);
return err;
}
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 3d1e15401384..8a56e09cfb0e 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -398,10 +398,14 @@ more_data:
static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
struct sk_msg tmp, *msg_tx = NULL;
- int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS;
int copied = 0, err = 0;
struct sk_psock *psock;
long timeo;
+ int flags;
+
+ /* Don't let internal do_tcp_sendpages() flags through */
+ flags = (msg->msg_flags & ~MSG_SENDPAGE_DECRYPTED);
+ flags |= MSG_NO_SHARED_FRAGS;
psock = sk_psock_get(sk);
if (unlikely(!psock))
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index e1862b64a90f..c445a81d144e 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -333,7 +333,8 @@ out:
* tcp_reinit_congestion_control (if the current congestion control was
* already initialized.
*/
-int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit)
+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
+ bool reinit, bool cap_net_admin)
{
struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_congestion_ops *ca;
@@ -369,8 +370,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, boo
} else {
err = -EBUSY;
}
- } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
- ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
+ } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin)) {
err = -EPERM;
} else if (!try_module_get(ca->owner)) {
err = -EBUSY;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4af1f5dae9d3..979520e46e33 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1288,6 +1288,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
int nsize, old_factor;
+ long limit;
int nlen;
u8 flags;
@@ -1298,8 +1299,16 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
if (nsize < 0)
nsize = 0;
- if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf &&
- tcp_queue != TCP_FRAG_IN_WRITE_QUEUE)) {
+ /* tcp_sendmsg() can overshoot sk_wmem_queued by one full size skb.
+ * We need some allowance to not penalize applications setting small
+ * SO_SNDBUF values.
+ * Also allow first and last skb in retransmit queue to be split.
+ */
+ limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE);
+ if (unlikely((sk->sk_wmem_queued >> 1) > limit &&
+ tcp_queue != TCP_FRAG_IN_WRITE_QUEUE &&
+ skb != tcp_rtx_queue_head(sk) &&
+ skb != tcp_rtx_queue_tail(sk))) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
return -ENOMEM;
}
@@ -1311,6 +1320,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
buff = sk_stream_alloc_skb(sk, nsize, gfp, true);
if (!buff)
return -ENOMEM; /* We'll just try again later. */
+ skb_copy_decrypted(buff, skb);
sk->sk_wmem_queued += buff->truesize;
sk_mem_charge(sk, buff->truesize);
@@ -1865,6 +1875,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
buff = sk_stream_alloc_skb(sk, 0, gfp, true);
if (unlikely(!buff))
return -ENOMEM;
+ skb_copy_decrypted(buff, skb);
sk->sk_wmem_queued += buff->truesize;
sk_mem_charge(sk, buff->truesize);
@@ -2134,6 +2145,7 @@ static int tcp_mtu_probe(struct sock *sk)
sk_mem_charge(sk, nskb->truesize);
skb = tcp_send_head(sk);
+ skb_copy_decrypted(nskb, skb);
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 3d8a1d835471..4849edb62d52 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -96,6 +96,19 @@ void tcp_get_available_ulp(char *buf, size_t maxlen)
rcu_read_unlock();
}
+void tcp_update_ulp(struct sock *sk, struct proto *proto)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (!icsk->icsk_ulp_ops) {
+ sk->sk_prot = proto;
+ return;
+ }
+
+ if (icsk->icsk_ulp_ops->update)
+ icsk->icsk_ulp_ops->update(sk, proto);
+}
+
void tcp_cleanup_ulp(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c21862ba9c02..d88821c794fb 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2170,7 +2170,7 @@ start_lookup:
/* Initialize UDP checksum. If exited with zero value (success),
* CHECKSUM_UNNECESSARY means, that no more checks are required.
- * Otherwise, csum completion requires chacksumming packet body,
+ * Otherwise, csum completion requires checksumming packet body,
* including udp header and folding it to skb->csum.
*/
static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 521e3203e83a..dc73888c7859 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -6432,8 +6432,6 @@ int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
}
static int minus_one = -1;
-static const int zero = 0;
-static const int one = 1;
static const int two_five_five = 255;
static const struct ctl_table addrconf_sysctl[] = {
@@ -6450,7 +6448,7 @@ static const struct ctl_table addrconf_sysctl[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = (void *)&one,
+ .extra1 = (void *)SYSCTL_ONE,
.extra2 = (void *)&two_five_five,
},
{
@@ -6809,7 +6807,7 @@ static const struct ctl_table addrconf_sysctl[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = (void *)&zero,
+ .extra1 = (void *)SYSCTL_ZERO,
.extra2 = (void *)&two_five_five,
},
{
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 25e1172fd1c3..95835e8d99aa 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -464,7 +464,7 @@ static void ah6_input_done(struct crypto_async_request *base, int err)
struct ah_data *ahp = x->data;
struct ip_auth_hdr *ah = ip_auth_hdr(skb);
int hdr_len = skb_network_header_len(skb);
- int ah_hlen = (ah->hdrlen + 2) << 2;
+ int ah_hlen = ipv6_authlen(ah);
if (err)
goto out;
@@ -546,7 +546,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
ahash = ahp->ahash;
nexthdr = ah->nexthdr;
- ah_hlen = (ah->hdrlen + 2) << 2;
+ ah_hlen = ipv6_authlen(ah);
if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) &&
ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len))
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 9d78c907b918..9ab897ded4df 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -74,7 +74,7 @@ int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr)
if (np->sndflow && (np->flow_label & IPV6_FLOWLABEL_MASK)) {
flowlabel = fl6_sock_lookup(sk, np->flow_label);
- if (!flowlabel)
+ if (IS_ERR(flowlabel))
return -EINVAL;
}
ip6_datagram_flow_key_init(&fl6, sk);
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 11a43ee4dd45..b358f1a4dd08 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -266,7 +266,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
} else if (nexthdr == NEXTHDR_AUTH) {
if (flags && (*flags & IP6_FH_F_AUTH) && (target < 0))
break;
- hdrlen = (hp->hdrlen + 2) << 2;
+ hdrlen = ipv6_authlen(hp);
} else
hdrlen = ipv6_optlen(hp);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 49884f96232b..87f47bc55c5e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1151,8 +1151,24 @@ add:
err = call_fib6_entry_notifiers(info->nl_net,
FIB_EVENT_ENTRY_ADD,
rt, extack);
- if (err)
+ if (err) {
+ struct fib6_info *sibling, *next_sibling;
+
+ /* If the route has siblings, then it first
+ * needs to be unlinked from them.
+ */
+ if (!rt->fib6_nsiblings)
+ return err;
+
+ list_for_each_entry_safe(sibling, next_sibling,
+ &rt->fib6_siblings,
+ fib6_siblings)
+ sibling->fib6_nsiblings--;
+ rt->fib6_nsiblings = 0;
+ list_del_init(&rt->fib6_siblings);
+ rt6_multipath_rebalance(next_sibling);
return err;
+ }
}
rcu_assign_pointer(rt->fib6_next, iter);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index ad284b1fd308..d64b83e85642 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -435,8 +435,6 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
}
fl->dst = freq->flr_dst;
atomic_set(&fl->users, 1);
- if (fl_shared_exclusive(fl) || fl->opt)
- static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
switch (fl->share) {
case IPV6_FL_S_EXCL:
case IPV6_FL_S_ANY:
@@ -451,10 +449,15 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
err = -EINVAL;
goto done;
}
+ if (fl_shared_exclusive(fl) || fl->opt)
+ static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
return fl;
done:
- fl_free(fl);
+ if (fl) {
+ kfree(fl->opt);
+ kfree(fl);
+ }
*err_p = err;
return NULL;
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index c2049c72f3e5..dd2d0b963260 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -660,12 +660,13 @@ static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb,
struct flowi6 *fl6, __u8 *dsfield,
int *encap_limit)
{
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct ipv6hdr *ipv6h;
struct ip6_tnl *t = netdev_priv(dev);
__u16 offset;
offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
/* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
+ ipv6h = ipv6_hdr(skb);
if (offset > 0) {
struct ipv6_tlv_tnl_enc_lim *tel;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index b80fde1bc005..754a484d35df 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -416,7 +416,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
break;
optlen = 8;
} else if (nexthdr == NEXTHDR_AUTH) {
- optlen = (hdr->hdrlen + 2) << 2;
+ optlen = ipv6_authlen(hdr);
} else {
optlen = ipv6_optlen(hdr);
}
@@ -1278,12 +1278,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
}
fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+ dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph));
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
return -1;
- dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph));
-
skb_set_inner_ipproto(skb, IPPROTO_IPIP);
err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
@@ -1367,12 +1366,11 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
}
fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+ dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
return -1;
- dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
-
skb_set_inner_ipproto(skb, IPPROTO_IPV6);
err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index e77ea1ed5edd..5cdb4a69d277 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -36,6 +36,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
opts.options |= XT_SYNPROXY_OPT_ECN;
opts.options &= info->options;
+ opts.mss_encode = opts.mss;
+ opts.mss = info->mss;
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy_init_timestamp_cookie(info, &opts);
else
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 0228ff3636bb..4e15a14435e4 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -55,7 +55,7 @@ static bool ah_mt6(const struct sk_buff *skb, struct xt_action_param *par)
return false;
}
- hdrlen = (ah->hdrlen + 2) << 2;
+ hdrlen = ipv6_authlen(ah);
pr_debug("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen);
pr_debug("RES %04X ", ah->reserved);
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index fd439f88377f..0fc6326ef499 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -71,7 +71,7 @@ ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par)
if (nexthdr == NEXTHDR_FRAGMENT)
hdrlen = 8;
else if (nexthdr == NEXTHDR_AUTH)
- hdrlen = (hp->hdrlen + 2) << 2;
+ hdrlen = ipv6_authlen(hp);
else
hdrlen = ipv6_optlen(hp);
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 6bcaf7357183..d800801a5dd2 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -55,7 +55,9 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
if (rpfilter_addr_linklocal(&iph->saddr)) {
lookup_flags |= RT6_LOOKUP_F_IFACE;
fl6.flowi6_oif = dev->ifindex;
- } else if ((flags & XT_RPFILTER_LOOSE) == 0)
+ /* Set flowi6_oif for vrf devices to lookup route in l3mdev domain. */
+ } else if (netif_is_l3_master(dev) || netif_is_l3_slave(dev) ||
+ (flags & XT_RPFILTER_LOOSE) == 0)
fl6.flowi6_oif = dev->ifindex;
rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
@@ -70,7 +72,9 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
goto out;
}
- if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE))
+ if (rt->rt6i_idev->dev == dev ||
+ l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == dev->ifindex ||
+ (flags & XT_RPFILTER_LOOSE))
ret = true;
out:
ip6_rt_put(rt);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 398e1df41406..fed9666a2f7d 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -348,7 +348,7 @@ static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
skb_reset_transport_header(skb);
- inet_frag_reasm_finish(&fq->q, skb, reasm_data);
+ inet_frag_reasm_finish(&fq->q, skb, reasm_data, false);
skb->ignore_df = 1;
skb->dev = dev;
@@ -414,7 +414,7 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
BUG();
if (nexthdr == NEXTHDR_AUTH)
- hdrlen = (hdr.hdrlen+2)<<2;
+ hdrlen = ipv6_authlen(&hdr);
else
hdrlen = ipv6_optlen(&hdr);
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index 549c51156d5d..f53bd8f01219 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -155,7 +155,7 @@ static void dump_ipv6_packet(struct net *net, struct nf_log_buf *m,
}
- hdrlen = (hp->hdrlen+2)<<2;
+ hdrlen = ipv6_authlen(hp);
break;
case IPPROTO_ESP:
if (logflags & NF_LOG_IPOPT) {
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index ca05b16f1bb9..1f5d4d196dcc 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -282,7 +282,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
skb_reset_transport_header(skb);
- inet_frag_reasm_finish(&fq->q, skb, reasm_data);
+ inet_frag_reasm_finish(&fq->q, skb, reasm_data, true);
skb->dev = dev;
ipv6_hdr(skb)->payload_len = htons(payload_len);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4d2e6b31a8d6..fd059e08785a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1951,7 +1951,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
nexthop_for_each_fib6_nh(from->nh, fib6_nh_find_match, &arg);
if (!arg.match)
- return;
+ goto unlock;
fib6_nh = arg.match;
} else {
fib6_nh = from->fib6_nh;
@@ -2563,7 +2563,7 @@ static struct dst_entry *rt6_check(struct rt6_info *rt,
{
u32 rt_cookie = 0;
- if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
+ if (!from || !fib6_get_cookie_safe(from, &rt_cookie) ||
rt_cookie != cookie)
return NULL;
@@ -6031,9 +6031,6 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
return 0;
}
-static int zero;
-static int one = 1;
-
static struct ctl_table ipv6_route_table_template[] = {
{
.procname = "flush",
@@ -6111,8 +6108,8 @@ static struct ctl_table ipv6_route_table_template[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{ }
};
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 80610899a323..b2ccbc473127 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -900,12 +900,17 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPV6,
0, dst, tiph->saddr, 0, 0,
sock_net_uid(tunnel->net, NULL));
- rt = ip_route_output_flow(tunnel->net, &fl4, NULL);
- if (IS_ERR(rt)) {
- dev->stats.tx_carrier_errors++;
- goto tx_error_icmp;
+ rt = dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr);
+ if (!rt) {
+ rt = ip_route_output_flow(tunnel->net, &fl4, NULL);
+ if (IS_ERR(rt)) {
+ dev->stats.tx_carrier_errors++;
+ goto tx_error_icmp;
+ }
+ dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, fl4.saddr);
}
+
if (rt->rt_type != RTN_UNICAST) {
ip_rt_put(rt);
dev->stats.tx_carrier_errors++;
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index dc4c91e0bfb8..ec8fcfc60a27 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -21,8 +21,6 @@
#include <net/calipso.h>
#endif
-static int zero;
-static int one = 1;
static int flowlabel_reflect_max = 0x7;
static int auto_flowlabels_min;
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
@@ -115,7 +113,7 @@ static struct ctl_table ipv6_table_template[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &flowlabel_reflect_max,
},
{
@@ -152,8 +150,8 @@ static struct ctl_table ipv6_table_template[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_rt6_multipath_hash_policy,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "seg6_flowlabel",
@@ -179,7 +177,7 @@ static struct ctl_table ipv6_rotable[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one
+ .extra1 = SYSCTL_ONE
},
#ifdef CONFIG_NETLABEL
{
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d56a9019a0fe..5da069e91cac 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -984,8 +984,13 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
if (sk) {
oif = sk->sk_bound_dev_if;
- if (sk_fullsock(sk))
+ if (sk_fullsock(sk)) {
+ const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
+
trace_tcp_send_reset(sk, skb);
+ if (np->repflow)
+ label = ip6_flowlabel(ipv6h);
+ }
if (sk->sk_state == TCP_TIME_WAIT)
label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
} else {
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 09e1694b6d34..ebb62a4ebe30 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -512,7 +512,9 @@ static void iucv_sock_close(struct sock *sk)
sk->sk_state = IUCV_DISCONN;
sk->sk_state_change(sk);
}
- case IUCV_DISCONN: /* fall through */
+ /* fall through */
+
+ case IUCV_DISCONN:
sk->sk_state = IUCV_CLOSING;
sk->sk_state_change(sk);
@@ -525,8 +527,9 @@ static void iucv_sock_close(struct sock *sk)
iucv_sock_in_state(sk, IUCV_CLOSED, 0),
timeo);
}
+ /* fall through */
- case IUCV_CLOSING: /* fall through */
+ case IUCV_CLOSING:
sk->sk_state = IUCV_CLOSED;
sk->sk_state_change(sk);
@@ -535,8 +538,9 @@ static void iucv_sock_close(struct sock *sk)
skb_queue_purge(&iucv->send_skb_q);
skb_queue_purge(&iucv->backlog_skb_q);
+ /* fall through */
- default: /* fall through */
+ default:
iucv_sever_path(sk, 1);
}
@@ -2247,10 +2251,10 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
kfree_skb(skb);
break;
}
- /* fall through and receive non-zero length data */
+ /* fall through - and receive non-zero length data */
case (AF_IUCV_FLAG_SHT):
/* shutdown request */
- /* fall through and receive zero length data */
+ /* fall through - and receive zero length data */
case 0:
/* plain data frame */
IUCV_SKB_CB(skb)->class = trans_hdr->iucv_hdr.class;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 1d0e5904dedf..c54cb59593ef 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1681,6 +1681,9 @@ static const struct proto_ops pppol2tp_ops = {
.recvmsg = pppol2tp_recvmsg,
.mmap = sock_no_mmap,
.ioctl = pppox_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = pppox_compat_ioctl,
+#endif
};
static const struct pppox_proto pppol2tp_proto = {
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 76cc9e967fa6..4d458067d80d 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -936,8 +936,10 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
err = ieee80211_set_probe_resp(sdata, params->probe_resp,
params->probe_resp_len, csa);
- if (err < 0)
+ if (err < 0) {
+ kfree(new);
return err;
+ }
if (err == 0)
changed |= BSS_CHANGED_AP_PROBE_RESP;
@@ -949,8 +951,10 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
params->civicloc,
params->civicloc_len);
- if (err < 0)
+ if (err < 0) {
+ kfree(new);
return err;
+ }
changed |= BSS_CHANGED_FTM_RESPONDER;
}
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index acd4afb4944b..c9a8a2433e8a 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -187,11 +187,16 @@ int drv_conf_tx(struct ieee80211_local *local,
if (!check_sdata_in_driver(sdata))
return -EIO;
- if (WARN_ONCE(params->cw_min == 0 ||
- params->cw_min > params->cw_max,
- "%s: invalid CW_min/CW_max: %d/%d\n",
- sdata->name, params->cw_min, params->cw_max))
+ if (params->cw_min == 0 || params->cw_min > params->cw_max) {
+ /*
+ * If we can't configure hardware anyway, don't warn. We may
+ * never have initialized the CW parameters.
+ */
+ WARN_ONCE(local->ops->conf_tx,
+ "%s: invalid CW_min/CW_max: %d/%d\n",
+ sdata->name, params->cw_min, params->cw_max);
return -EINVAL;
+ }
trace_drv_conf_tx(local, sdata, ac, params);
if (local->ops->conf_tx)
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 06aac0aaae64..8dc6580e1787 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1222,7 +1222,6 @@ static void ieee80211_if_setup(struct net_device *dev)
static void ieee80211_if_setup_no_queue(struct net_device *dev)
{
ieee80211_if_setup(dev);
- dev->features |= NETIF_F_LLTX;
dev->priv_flags |= IFF_NO_QUEUE;
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a99ad0325309..4c888dc9bd81 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2042,6 +2042,16 @@ ieee80211_sta_wmm_params(struct ieee80211_local *local,
ieee80211_regulatory_limit_wmm_params(sdata, &params[ac], ac);
}
+ /* WMM specification requires all 4 ACIs. */
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ if (params[ac].cw_min == 0) {
+ sdata_info(sdata,
+ "AP has invalid WMM params (missing AC %d), using defaults\n",
+ ac);
+ return false;
+ }
+ }
+
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
mlme_dbg(sdata,
"WMM AC=%d acm=%d aifs=%d cWmin=%d cWmax=%d txop=%d uapsd=%d, downgraded=%d\n",
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 1b224fa27367..ad1e58184c4e 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -3796,9 +3796,7 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
}
/* Always allow software iftypes */
- if (local->hw.wiphy->software_iftypes & BIT(iftype) ||
- (iftype == NL80211_IFTYPE_AP_VLAN &&
- local->hw.wiphy->flags & WIPHY_FLAG_4ADDR_AP)) {
+ if (cfg80211_iftype_allowed(local->hw.wiphy, iftype, 0, 1)) {
if (radar_detect)
return -EINVAL;
return 0;
@@ -3833,7 +3831,8 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
if (sdata_iter == sdata ||
!ieee80211_sdata_running(sdata_iter) ||
- local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype))
+ cfg80211_iftype_allowed(local->hw.wiphy,
+ wdev_iter->iftype, 0, 1))
continue;
params.iftype_num[wdev_iter->iftype]++;
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 198ec4fe4148..c312741df2ce 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -37,8 +37,6 @@
#define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1)
-static int zero = 0;
-static int one = 1;
static int label_limit = (1 << 20) - 1;
static int ttl_max = 255;
@@ -2607,7 +2605,7 @@ static int mpls_platform_labels(struct ctl_table *table, int write,
.data = &platform_labels,
.maxlen = sizeof(int),
.mode = table->mode,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &label_limit,
};
@@ -2636,8 +2634,8 @@ static const struct ctl_table mpls_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "default_ttl",
@@ -2645,7 +2643,7 @@ static const struct ctl_table mpls_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
.extra2 = &ttl_max,
},
{ }
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 32a45c03786e..0d65f4d39494 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -223,8 +223,6 @@ config NF_CONNTRACK_FTP
of Network Address Translation on them.
This is FTP support on Layer 3 independent connection tracking.
- Layer 3 independent connection tracking is experimental scheme
- which generalize ip_conntrack to support other layer 3 protocols.
To compile it as a module, choose M here. If unsure, say N.
@@ -338,7 +336,7 @@ config NF_CONNTRACK_SIP
help
SIP is an application-layer control protocol that can establish,
modify, and terminate multimedia sessions (conferences) such as
- Internet telephony calls. With the ip_conntrack_sip and
+ Internet telephony calls. With the nf_conntrack_sip and
the nf_nat_sip modules you can support the protocol on a connection
tracking/NATing firewall.
@@ -1313,7 +1311,7 @@ config NETFILTER_XT_MATCH_HELPER
depends on NETFILTER_ADVANCED
help
Helper matching allows you to match packets in dynamic connections
- tracked by a conntrack-helper, ie. ip_conntrack_ftp
+ tracked by a conntrack-helper, ie. nf_conntrack_ftp
To compile it as a module, choose M here. If unsure, say Y.
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index ca7ac4a25ada..1d4e63326e68 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -226,7 +226,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
e.id = ip_to_id(map, ip);
- if (opt->flags & IPSET_DIM_ONE_SRC)
+ if (opt->flags & IPSET_DIM_TWO_SRC)
ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
else
ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 2e151856ad99..e64d5f9a89dd 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1161,7 +1161,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
return -ENOENT;
write_lock_bh(&ip_set_ref_lock);
- if (set->ref != 0) {
+ if (set->ref != 0 || set->ref_netlink != 0) {
ret = -IPSET_ERR_REFERENCED;
goto out;
}
diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c
index faf59b6a998f..24d8f4df4230 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmac.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmac.c
@@ -89,15 +89,11 @@ hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb,
struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- /* MAC can be src only */
- if (!(opt->flags & IPSET_DIM_TWO_SRC))
- return 0;
-
if (skb_mac_header(skb) < skb->head ||
(skb_mac_header(skb) + ETH_HLEN) > skb->data)
return -EINVAL;
- if (opt->flags & IPSET_DIM_ONE_SRC)
+ if (opt->flags & IPSET_DIM_TWO_SRC)
ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
else
ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 07e0967bf129..060565e7d227 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1726,7 +1726,6 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs)
#ifdef CONFIG_SYSCTL
-static int zero;
static int three = 3;
static int
@@ -1935,7 +1934,7 @@ static struct ctl_table vs_vars[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &three,
},
{
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 403541996952..08adcb222986 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -231,7 +231,7 @@ void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
IP_VS_DBG_BUF(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
__func__, ct, ARG_TUPLE(&exp->tuple));
- nf_ct_expect_related(exp);
+ nf_ct_expect_related(exp, 0);
nf_ct_expect_put(exp);
}
EXPORT_SYMBOL(ip_vs_nfct_expect_related);
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 42ee659d0d1e..d011d2eb0848 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -159,7 +159,7 @@ static int amanda_help(struct sk_buff *skb,
if (nf_nat_amanda && ct->status & IPS_NAT_MASK)
ret = nf_nat_amanda(skb, ctinfo, protoff,
off - dataoff, len, exp);
- else if (nf_ct_expect_related(exp) != 0) {
+ else if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, ct, "cannot add expectation");
ret = NF_DROP;
}
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index 921a7b95be68..1ba6becc3079 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -68,7 +68,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
exp->class = NF_CT_EXPECT_CLASS_DEFAULT;
exp->helper = NULL;
- nf_ct_expect_related(exp);
+ nf_ct_expect_related(exp, 0);
nf_ct_expect_put(exp);
nf_ct_refresh(ct, skb, timeout * HZ);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index bdfeacee0817..81a8ef42b88d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -453,13 +453,12 @@ EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
* table location, we assume id gets exposed to userspace.
*
* Following nf_conn items do not change throughout lifetime
- * of the nf_conn after it has been committed to main hash table:
+ * of the nf_conn:
*
* 1. nf_conn address
- * 2. nf_conn->ext address
- * 3. nf_conn->master address (normally NULL)
- * 4. tuple
- * 5. the associated net namespace
+ * 2. nf_conn->master address (normally NULL)
+ * 3. the associated net namespace
+ * 4. the original direction tuple
*/
u32 nf_ct_get_id(const struct nf_conn *ct)
{
@@ -469,9 +468,10 @@ u32 nf_ct_get_id(const struct nf_conn *ct)
net_get_random_once(&ct_id_seed, sizeof(ct_id_seed));
a = (unsigned long)ct;
- b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct));
- c = (unsigned long)ct->ext;
- d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash),
+ b = (unsigned long)ct->master;
+ c = (unsigned long)nf_ct_net(ct);
+ d = (unsigned long)siphash(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ sizeof(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple),
&ct_id_seed);
#ifdef CONFIG_64BIT
return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed);
@@ -1817,9 +1817,7 @@ EXPORT_SYMBOL_GPL(nf_ct_kill_acct);
#include <linux/netfilter/nfnetlink_conntrack.h>
#include <linux/mutex.h>
-/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
- * in ip_conntrack_core, since we don't want the protocols to autoload
- * or depend on ctnetlink */
+/* Generic function for tcp/udp/sctp/dccp and alike. */
int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index ffd1f4906c4f..65364de915d1 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -249,13 +249,22 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
static inline int expect_matches(const struct nf_conntrack_expect *a,
const struct nf_conntrack_expect *b)
{
- return a->master == b->master &&
- nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
+ return nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
}
+static bool master_matches(const struct nf_conntrack_expect *a,
+ const struct nf_conntrack_expect *b,
+ unsigned int flags)
+{
+ if (flags & NF_CT_EXP_F_SKIP_MASTER)
+ return true;
+
+ return a->master == b->master;
+}
+
/* Generally a bad idea to call this: could have matched already. */
void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
{
@@ -399,7 +408,8 @@ static void evict_oldest_expect(struct nf_conn *master,
nf_ct_remove_expect(last);
}
-static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
+static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
+ unsigned int flags)
{
const struct nf_conntrack_expect_policy *p;
struct nf_conntrack_expect *i;
@@ -417,8 +427,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
}
h = nf_ct_expect_dst_hash(net, &expect->tuple);
hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
- if (expect_matches(i, expect)) {
- if (i->class != expect->class)
+ if (master_matches(i, expect, flags) &&
+ expect_matches(i, expect)) {
+ if (i->class != expect->class ||
+ i->master != expect->master)
return -EALREADY;
if (nf_ct_remove_expect(i))
@@ -453,12 +465,12 @@ out:
}
int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
- u32 portid, int report)
+ u32 portid, int report, unsigned int flags)
{
int ret;
spin_lock_bh(&nf_conntrack_expect_lock);
- ret = __nf_ct_expect_check(expect);
+ ret = __nf_ct_expect_check(expect, flags);
if (ret < 0)
goto out;
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 8c6c11bab5b6..0ecb3e289ef2 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -525,7 +525,7 @@ skip_nl_seq:
protoff, matchoff, matchlen, exp);
else {
/* Can't expect this? Best to drop packet now. */
- if (nf_ct_expect_related(exp) != 0) {
+ if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, ct, "cannot add expectation");
ret = NF_DROP;
} else
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index 8f6ba8162f0b..573cb4481481 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -1,11 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * ip_conntrack_helper_h323_asn1.c - BER and PER decoding library for H.323
- * conntrack/NAT module.
+ * BER and PER decoding library for H.323 conntrack/NAT module.
*
* Copyright (c) 2006 by Jing Min Zhao <zhaojingmin@users.sourceforge.net>
*
- * See ip_conntrack_helper_h323_asn1.h for details.
+ * See nf_conntrack_helper_h323_asn1.h for details.
*/
#ifdef __KERNEL__
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 6497e5fc0871..8ba037b76ad3 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -305,8 +305,8 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
ret = nat_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff,
taddr, port, rtp_port, rtp_exp, rtcp_exp);
} else { /* Conntrack only */
- if (nf_ct_expect_related(rtp_exp) == 0) {
- if (nf_ct_expect_related(rtcp_exp) == 0) {
+ if (nf_ct_expect_related(rtp_exp, 0) == 0) {
+ if (nf_ct_expect_related(rtcp_exp, 0) == 0) {
pr_debug("nf_ct_h323: expect RTP ");
nf_ct_dump_tuple(&rtp_exp->tuple);
pr_debug("nf_ct_h323: expect RTCP ");
@@ -364,7 +364,7 @@ static int expect_t120(struct sk_buff *skb,
ret = nat_t120(skb, ct, ctinfo, protoff, data, dataoff, taddr,
port, exp);
} else { /* Conntrack only */
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_h323: expect T.120 ");
nf_ct_dump_tuple(&exp->tuple);
} else
@@ -701,7 +701,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
ret = nat_h245(skb, ct, ctinfo, protoff, data, dataoff, taddr,
port, exp);
} else { /* Conntrack only */
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_q931: expect H.245 ");
nf_ct_dump_tuple(&exp->tuple);
} else
@@ -825,7 +825,7 @@ static int expect_callforwarding(struct sk_buff *skb,
protoff, data, dataoff,
taddr, port, exp);
} else { /* Conntrack only */
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_q931: expect Call Forwarding ");
nf_ct_dump_tuple(&exp->tuple);
} else
@@ -1284,7 +1284,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
ret = nat_q931(skb, ct, ctinfo, protoff, data,
taddr, i, port, exp);
} else { /* Conntrack only */
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
nf_ct_dump_tuple(&exp->tuple);
@@ -1349,7 +1349,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
IPPROTO_UDP, NULL, &port);
exp->helper = nf_conntrack_helper_ras;
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect RAS ");
nf_ct_dump_tuple(&exp->tuple);
} else
@@ -1561,7 +1561,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
exp->flags = NF_CT_EXPECT_PERMANENT;
exp->helper = nf_conntrack_helper_q931;
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
nf_ct_dump_tuple(&exp->tuple);
} else
@@ -1615,7 +1615,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
exp->flags = NF_CT_EXPECT_PERMANENT;
exp->helper = nf_conntrack_helper_q931;
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
nf_ct_dump_tuple(&exp->tuple);
} else
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 7ac156f1f3bc..e40988a2f22f 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -213,7 +213,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
addr_beg_p - ib_ptr,
addr_end_p - addr_beg_p,
exp);
- else if (nf_ct_expect_related(exp) != 0) {
+ else if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, ct,
"cannot add expectation");
ret = NF_DROP;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 1b77444d5b52..6aa01eb6fe99 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2616,7 +2616,7 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
if (IS_ERR(exp))
return PTR_ERR(exp);
- err = nf_ct_expect_related_report(exp, portid, report);
+ err = nf_ct_expect_related_report(exp, portid, report, 0);
nf_ct_expect_put(exp);
return err;
}
@@ -3367,7 +3367,7 @@ ctnetlink_create_expect(struct net *net,
goto err_rcu;
}
- err = nf_ct_expect_related_report(exp, portid, report);
+ err = nf_ct_expect_related_report(exp, portid, report, 0);
nf_ct_expect_put(exp);
err_rcu:
rcu_read_unlock();
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index b22042ad0fca..a971183f11af 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -234,9 +234,9 @@ static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid)
nf_nat_pptp_exp_gre = rcu_dereference(nf_nat_pptp_hook_exp_gre);
if (nf_nat_pptp_exp_gre && ct->status & IPS_NAT_MASK)
nf_nat_pptp_exp_gre(exp_orig, exp_reply);
- if (nf_ct_expect_related(exp_orig) != 0)
+ if (nf_ct_expect_related(exp_orig, 0) != 0)
goto out_put_both;
- if (nf_ct_expect_related(exp_reply) != 0)
+ if (nf_ct_expect_related(exp_reply, 0) != 0)
goto out_unexpect_orig;
/* Add GRE keymap entries */
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index c2eb365f1723..5b05487a60d2 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * ip_conntrack_proto_gre.c - Version 3.0
- *
* Connection tracking protocol helper module for GRE.
*
* GRE is a generic encapsulation protocol, which is generally not very
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index dd53e2b20f6b..097deba7441a 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -215,7 +215,7 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
return -NF_ACCEPT;
}
- /* See ip_conntrack_proto_tcp.c */
+ /* See nf_conntrack_proto_tcp.c */
if (state->net->ct.sysctl_checksum &&
state->hook == NF_INET_PRE_ROUTING &&
nf_ip_checksum(skb, state->hook, dataoff, IPPROTO_ICMP)) {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index d5fdfa00d683..85c1f8c213b0 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -472,6 +472,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
struct ip_ct_tcp_state *receiver = &state->seen[!dir];
const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
__u32 seq, ack, sack, end, win, swin;
+ u16 win_raw;
s32 receiver_offset;
bool res, in_recv_win;
@@ -480,7 +481,8 @@ static bool tcp_in_window(const struct nf_conn *ct,
*/
seq = ntohl(tcph->seq);
ack = sack = ntohl(tcph->ack_seq);
- win = ntohs(tcph->window);
+ win_raw = ntohs(tcph->window);
+ win = win_raw;
end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
@@ -655,14 +657,14 @@ static bool tcp_in_window(const struct nf_conn *ct,
&& state->last_seq == seq
&& state->last_ack == ack
&& state->last_end == end
- && state->last_win == win)
+ && state->last_win == win_raw)
state->retrans++;
else {
state->last_dir = dir;
state->last_seq = seq;
state->last_ack = ack;
state->last_end = end;
- state->last_win = win;
+ state->last_win = win_raw;
state->retrans = 0;
}
}
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index 81448c3db661..1aebd6569d4e 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -153,7 +153,7 @@ static int help(struct sk_buff *skb,
nf_ct_dump_tuple(&exp->tuple);
/* Can't expect this? Best to drop packet now. */
- if (nf_ct_expect_related(exp) != 0) {
+ if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, ct, "cannot add expectation");
ret = NF_DROP;
}
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 107251731809..b83dc9bf0a5d 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -977,11 +977,15 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
/* -EALREADY handling works around end-points that send
* SDP messages with identical port but different media type,
* we pretend expectation was set up.
+ * It also works in the case that SDP messages are sent with
+ * identical expect tuples but for different master conntracks.
*/
- int errp = nf_ct_expect_related(rtp_exp);
+ int errp = nf_ct_expect_related(rtp_exp,
+ NF_CT_EXP_F_SKIP_MASTER);
if (errp == 0 || errp == -EALREADY) {
- int errcp = nf_ct_expect_related(rtcp_exp);
+ int errcp = nf_ct_expect_related(rtcp_exp,
+ NF_CT_EXP_F_SKIP_MASTER);
if (errcp == 0 || errcp == -EALREADY)
ret = NF_ACCEPT;
@@ -1296,7 +1300,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
ret = hooks->expect(skb, protoff, dataoff, dptr, datalen,
exp, matchoff, matchlen);
else {
- if (nf_ct_expect_related(exp) != 0) {
+ if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, ct, "cannot add expectation");
ret = NF_DROP;
} else
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index df6d6d61bd58..80ee53f29f68 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -78,7 +78,7 @@ static int tftp_help(struct sk_buff *skb,
nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook);
if (nf_nat_tftp && ct->status & IPS_NAT_MASK)
ret = nf_nat_tftp(skb, ctinfo, exp);
- else if (nf_ct_expect_related(exp) != 0) {
+ else if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, ct, "cannot add expectation");
ret = NF_DROP;
}
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index e3d797252a98..80a8f9ae4c93 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -111,15 +111,16 @@ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
-static void flow_offload_fixup_ct_state(struct nf_conn *ct)
+static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
+{
+ return (__s32)(timeout - (u32)jiffies);
+}
+
+static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
{
const struct nf_conntrack_l4proto *l4proto;
+ int l4num = nf_ct_protonum(ct);
unsigned int timeout;
- int l4num;
-
- l4num = nf_ct_protonum(ct);
- if (l4num == IPPROTO_TCP)
- flow_offload_fixup_tcp(&ct->proto.tcp);
l4proto = nf_ct_l4proto_find(l4num);
if (!l4proto)
@@ -132,7 +133,20 @@ static void flow_offload_fixup_ct_state(struct nf_conn *ct)
else
return;
- ct->timeout = nfct_time_stamp + timeout;
+ if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
+ ct->timeout = nfct_time_stamp + timeout;
+}
+
+static void flow_offload_fixup_ct_state(struct nf_conn *ct)
+{
+ if (nf_ct_protonum(ct) == IPPROTO_TCP)
+ flow_offload_fixup_tcp(&ct->proto.tcp);
+}
+
+static void flow_offload_fixup_ct(struct nf_conn *ct)
+{
+ flow_offload_fixup_ct_state(ct);
+ flow_offload_fixup_ct_timeout(ct);
}
void flow_offload_free(struct flow_offload *flow)
@@ -208,6 +222,11 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
}
EXPORT_SYMBOL_GPL(flow_offload_add);
+static inline bool nf_flow_has_expired(const struct flow_offload *flow)
+{
+ return nf_flow_timeout_delta(flow->timeout) <= 0;
+}
+
static void flow_offload_del(struct nf_flowtable *flow_table,
struct flow_offload *flow)
{
@@ -223,6 +242,11 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
e = container_of(flow, struct flow_offload_entry, flow);
clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
+ if (nf_flow_has_expired(flow))
+ flow_offload_fixup_ct(e->ct);
+ else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
+ flow_offload_fixup_ct_timeout(e->ct);
+
flow_offload_free(flow);
}
@@ -298,11 +322,6 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
return err;
}
-static inline bool nf_flow_has_expired(const struct flow_offload *flow)
-{
- return (__s32)(flow->timeout - (u32)jiffies) <= 0;
-}
-
static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
{
struct nf_flowtable *flow_table = data;
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index cdfc33517e85..d68c801dd614 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -214,6 +214,25 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
return true;
}
+static int nf_flow_offload_dst_check(struct dst_entry *dst)
+{
+ if (unlikely(dst_xfrm(dst)))
+ return dst_check(dst, 0) ? 0 : -1;
+
+ return 0;
+}
+
+static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
+ const struct nf_hook_state *state,
+ struct dst_entry *dst)
+{
+ skb_orphan(skb);
+ skb_dst_set_noref(skb, dst);
+ skb->tstamp = 0;
+ dst_output(state->net, state->sk, skb);
+ return NF_STOLEN;
+}
+
unsigned int
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
@@ -254,6 +273,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
return NF_ACCEPT;
+ if (nf_flow_offload_dst_check(&rt->dst)) {
+ flow_offload_teardown(flow);
+ return NF_ACCEPT;
+ }
+
if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
return NF_DROP;
@@ -261,6 +285,13 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
iph = ip_hdr(skb);
ip_decrease_ttl(iph);
+ if (unlikely(dst_xfrm(&rt->dst))) {
+ memset(skb->cb, 0, sizeof(struct inet_skb_parm));
+ IPCB(skb)->iif = skb->dev->ifindex;
+ IPCB(skb)->flags = IPSKB_FORWARDED;
+ return nf_flow_xmit_xfrm(skb, state, &rt->dst);
+ }
+
skb->dev = outdev;
nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
skb_dst_set_noref(skb, &rt->dst);
@@ -467,6 +498,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
sizeof(*ip6h)))
return NF_ACCEPT;
+ if (nf_flow_offload_dst_check(&rt->dst)) {
+ flow_offload_teardown(flow);
+ return NF_ACCEPT;
+ }
+
if (skb_try_make_writable(skb, sizeof(*ip6h)))
return NF_DROP;
@@ -477,6 +513,13 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
ip6h = ipv6_hdr(skb);
ip6h->hop_limit--;
+ if (unlikely(dst_xfrm(&rt->dst))) {
+ memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+ IP6CB(skb)->iif = skb->dev->ifindex;
+ IP6CB(skb)->flags = IP6SKB_FORWARDED;
+ return nf_flow_xmit_xfrm(skb, state, &rt->dst);
+ }
+
skb->dev = outdev;
nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
skb_dst_set_noref(skb, &rt->dst);
diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c
index a352604d6186..3bc7e0854efe 100644
--- a/net/netfilter/nf_nat_amanda.c
+++ b/net/netfilter/nf_nat_amanda.c
@@ -48,7 +48,7 @@ static unsigned int help(struct sk_buff *skb,
int res;
exp->tuple.dst.u.tcp.port = htons(port);
- res = nf_ct_expect_related(exp);
+ res = nf_ct_expect_related(exp, 0);
if (res == 0)
break;
else if (res != -EBUSY) {
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 9ab410455992..3f6023ed4966 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -519,7 +519,7 @@ another_round:
* and NF_INET_LOCAL_OUT, we change the destination to map into the
* range. It might not be possible to get a unique tuple, but we try.
* At worst (or if we race), we will end up with a final duplicate in
- * __ip_conntrack_confirm and drop the packet. */
+ * __nf_conntrack_confirm and drop the packet. */
static void
get_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig_tuple,
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index d48484a9d52d..aace6768a64e 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -91,7 +91,7 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
int ret;
exp->tuple.dst.u.tcp.port = htons(port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, 0);
if (ret == 0)
break;
else if (ret != -EBUSY) {
diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
index dfb7ef8845bd..c691ab8d234c 100644
--- a/net/netfilter/nf_nat_irc.c
+++ b/net/netfilter/nf_nat_irc.c
@@ -53,7 +53,7 @@ static unsigned int help(struct sk_buff *skb,
int ret;
exp->tuple.dst.u.tcp.port = htons(port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, 0);
if (ret == 0)
break;
else if (ret != -EBUSY) {
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index e338d91980d8..f0a735e86851 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -414,7 +414,7 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
int ret;
exp->tuple.dst.u.udp.port = htons(port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, NF_CT_EXP_F_SKIP_MASTER);
if (ret == 0)
break;
else if (ret != -EBUSY) {
@@ -607,7 +607,8 @@ static unsigned int nf_nat_sdp_media(struct sk_buff *skb, unsigned int protoff,
int ret;
rtp_exp->tuple.dst.u.udp.port = htons(port);
- ret = nf_ct_expect_related(rtp_exp);
+ ret = nf_ct_expect_related(rtp_exp,
+ NF_CT_EXP_F_SKIP_MASTER);
if (ret == -EBUSY)
continue;
else if (ret < 0) {
@@ -615,7 +616,8 @@ static unsigned int nf_nat_sdp_media(struct sk_buff *skb, unsigned int protoff,
break;
}
rtcp_exp->tuple.dst.u.udp.port = htons(port + 1);
- ret = nf_ct_expect_related(rtcp_exp);
+ ret = nf_ct_expect_related(rtcp_exp,
+ NF_CT_EXP_F_SKIP_MASTER);
if (ret == 0)
break;
else if (ret == -EBUSY) {
diff --git a/net/netfilter/nf_nat_tftp.c b/net/netfilter/nf_nat_tftp.c
index 833a11f68031..1a591132d6eb 100644
--- a/net/netfilter/nf_nat_tftp.c
+++ b/net/netfilter/nf_nat_tftp.c
@@ -30,7 +30,7 @@ static unsigned int help(struct sk_buff *skb,
= ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
exp->dir = IP_CT_DIR_REPLY;
exp->expectfn = nf_nat_follow_master;
- if (nf_ct_expect_related(exp) != 0) {
+ if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, exp->master, "cannot add expectation");
return NF_DROP;
}
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index b101f187eda8..c769462a839e 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -470,7 +470,7 @@ synproxy_send_client_synack(struct net *net,
struct iphdr *iph, *niph;
struct tcphdr *nth;
unsigned int tcp_hdr_size;
- u16 mss = opts->mss;
+ u16 mss = opts->mss_encode;
iph = ip_hdr(skb);
@@ -687,7 +687,7 @@ ipv4_synproxy_hook(void *priv, struct sk_buff *skb,
state = &ct->proto.tcp;
switch (state->state) {
case TCP_CONNTRACK_CLOSE:
- if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ if (th->rst && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
ntohl(th->seq) + 1);
break;
@@ -884,7 +884,7 @@ synproxy_send_client_synack_ipv6(struct net *net,
struct ipv6hdr *iph, *niph;
struct tcphdr *nth;
unsigned int tcp_hdr_size;
- u16 mss = opts->mss;
+ u16 mss = opts->mss_encode;
iph = ipv6_hdr(skb);
@@ -1111,7 +1111,7 @@ ipv6_synproxy_hook(void *priv, struct sk_buff *skb,
state = &ct->proto.tcp;
switch (state->state) {
case TCP_CONNTRACK_CLOSE:
- if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ if (th->rst && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
ntohl(th->seq) + 1);
break;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ed17a7c29b86..d47469f824a1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -138,9 +138,14 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
return;
list_for_each_entry_reverse(trans, &net->nft.commit_list, list) {
- if (trans->msg_type == NFT_MSG_NEWSET &&
- nft_trans_set(trans) == set) {
- set->bound = true;
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWSET:
+ if (nft_trans_set(trans) == set)
+ nft_trans_set_bound(trans) = true;
+ break;
+ case NFT_MSG_NEWSETELEM:
+ if (nft_trans_elem_set(trans) == set)
+ nft_trans_elem_set_bound(trans) = true;
break;
}
}
@@ -1662,7 +1667,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
chain->flags |= NFT_BASE_CHAIN | flags;
basechain->policy = NF_ACCEPT;
- INIT_LIST_HEAD(&basechain->cb_list);
+ if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
+ nft_chain_offload_priority(basechain) < 0)
+ return -EOPNOTSUPP;
+
+ flow_block_init(&basechain->flow_block);
} else {
chain = kzalloc(sizeof(*chain), GFP_KERNEL);
if (chain == NULL)
@@ -1900,6 +1909,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
if (nla[NFTA_CHAIN_FLAGS])
flags = ntohl(nla_get_be32(nla[NFTA_CHAIN_FLAGS]));
+ else if (chain)
+ flags = chain->flags;
nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
@@ -6904,7 +6915,7 @@ static int __nf_tables_abort(struct net *net)
break;
case NFT_MSG_NEWSET:
trans->ctx.table->use--;
- if (nft_trans_set(trans)->bound) {
+ if (nft_trans_set_bound(trans)) {
nft_trans_destroy(trans);
break;
}
@@ -6916,7 +6927,7 @@ static int __nf_tables_abort(struct net *net)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
- if (nft_trans_elem_set(trans)->bound) {
+ if (nft_trans_elem_set_bound(trans)) {
nft_trans_destroy(trans);
break;
}
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 2c3302845f67..c0d18c1d77ac 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -103,10 +103,11 @@ void nft_offload_update_dependency(struct nft_offload_ctx *ctx,
}
static void nft_flow_offload_common_init(struct flow_cls_common_offload *common,
- __be16 proto,
- struct netlink_ext_ack *extack)
+ __be16 proto, int priority,
+ struct netlink_ext_ack *extack)
{
common->protocol = proto;
+ common->prio = priority;
common->extack = extack;
}
@@ -116,7 +117,7 @@ static int nft_setup_cb_call(struct nft_base_chain *basechain,
struct flow_block_cb *block_cb;
int err;
- list_for_each_entry(block_cb, &basechain->cb_list, list) {
+ list_for_each_entry(block_cb, &basechain->flow_block.cb_list, list) {
err = block_cb->cb(type, type_data, block_cb->cb_priv);
if (err < 0)
return err;
@@ -124,6 +125,15 @@ static int nft_setup_cb_call(struct nft_base_chain *basechain,
return 0;
}
+int nft_chain_offload_priority(struct nft_base_chain *basechain)
+{
+ if (basechain->ops.priority <= 0 ||
+ basechain->ops.priority > USHRT_MAX)
+ return -1;
+
+ return 0;
+}
+
static int nft_flow_offload_rule(struct nft_trans *trans,
enum flow_cls_command command)
{
@@ -142,7 +152,8 @@ static int nft_flow_offload_rule(struct nft_trans *trans,
if (flow)
proto = flow->proto;
- nft_flow_offload_common_init(&cls_flow.common, proto, &extack);
+ nft_flow_offload_common_init(&cls_flow.common, proto,
+ basechain->ops.priority, &extack);
cls_flow.command = command;
cls_flow.cookie = (unsigned long) rule;
if (flow)
@@ -154,7 +165,7 @@ static int nft_flow_offload_rule(struct nft_trans *trans,
static int nft_flow_offload_bind(struct flow_block_offload *bo,
struct nft_base_chain *basechain)
{
- list_splice(&bo->cb_list, &basechain->cb_list);
+ list_splice(&bo->cb_list, &basechain->flow_block.cb_list);
return 0;
}
@@ -198,6 +209,7 @@ static int nft_flow_offload_chain(struct nft_trans *trans,
return -EOPNOTSUPP;
bo.command = cmd;
+ bo.block = &basechain->flow_block;
bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
bo.extack = &extack;
INIT_LIST_HEAD(&bo.cb_list);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 92077d459109..4abbb452cf6c 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -578,7 +578,7 @@ static int nfnetlink_bind(struct net *net, int group)
ss = nfnetlink_get_subsys(type << 8);
rcu_read_unlock();
if (!ss)
- request_module("nfnetlink-subsys-%d", type);
+ request_module_nowait("nfnetlink-subsys-%d", type);
return 0;
}
#endif
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 3fd540b2c6ba..b5d5d071d765 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -193,7 +193,7 @@ static inline void nft_chain_filter_inet_init(void) {}
static inline void nft_chain_filter_inet_fini(void) {}
#endif /* CONFIG_NF_TABLES_IPV6 */
-#ifdef CONFIG_NF_TABLES_BRIDGE
+#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE)
static unsigned int
nft_do_chain_bridge(void *priv,
struct sk_buff *skb,
diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c
index 2f89bde3c61c..ff9ac8ae0031 100644
--- a/net/netfilter/nft_chain_nat.c
+++ b/net/netfilter/nft_chain_nat.c
@@ -142,3 +142,6 @@ MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat");
#ifdef CONFIG_NF_TABLES_IPV6
MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat");
#endif
+#ifdef CONFIG_NF_TABLES_INET
+MODULE_ALIAS_NFT_CHAIN(1, "nat"); /* NFPROTO_INET */
+#endif
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 827ab6196df9..46ca8bcca1bd 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -1252,7 +1252,7 @@ static void nft_ct_expect_obj_eval(struct nft_object *obj,
priv->l4proto, NULL, &priv->dport);
exp->timeout.expires = jiffies + priv->timeout * HZ;
- if (nf_ct_expect_related(exp) != 0)
+ if (nf_ct_expect_related(exp, 0) != 0)
regs->verdict.code = NF_DROP;
}
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index aa5f571d4361..060a4ed46d5e 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -72,11 +72,11 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
{
struct nft_flow_offload *priv = nft_expr_priv(expr);
struct nf_flowtable *flowtable = &priv->flowtable->data;
+ struct tcphdr _tcph, *tcph = NULL;
enum ip_conntrack_info ctinfo;
struct nf_flow_route route;
struct flow_offload *flow;
enum ip_conntrack_dir dir;
- bool is_tcp = false;
struct nf_conn *ct;
int ret;
@@ -89,7 +89,10 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
case IPPROTO_TCP:
- is_tcp = true;
+ tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff,
+ sizeof(_tcph), &_tcph);
+ if (unlikely(!tcph || tcph->fin || tcph->rst))
+ goto out;
break;
case IPPROTO_UDP:
break;
@@ -115,7 +118,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
if (!flow)
goto err_flow_alloc;
- if (is_tcp) {
+ if (tcph) {
ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
}
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index fe93e731dc7f..b836d550b919 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -129,7 +129,7 @@ static int nft_symhash_init(const struct nft_ctx *ctx,
priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
- if (priv->modulus <= 1)
+ if (priv->modulus < 1)
return -ERANGE;
if (priv->offset + priv->modulus - 1 < priv->offset)
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 76866f77e343..f69afb9ff3cb 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -60,24 +60,16 @@ void nft_meta_get_eval(const struct nft_expr *expr,
*dest = skb->mark;
break;
case NFT_META_IIF:
- if (in == NULL)
- goto err;
- *dest = in->ifindex;
+ *dest = in ? in->ifindex : 0;
break;
case NFT_META_OIF:
- if (out == NULL)
- goto err;
- *dest = out->ifindex;
+ *dest = out ? out->ifindex : 0;
break;
case NFT_META_IIFNAME:
- if (in == NULL)
- goto err;
- strncpy((char *)dest, in->name, IFNAMSIZ);
+ strncpy((char *)dest, in ? in->name : "", IFNAMSIZ);
break;
case NFT_META_OIFNAME:
- if (out == NULL)
- goto err;
- strncpy((char *)dest, out->name, IFNAMSIZ);
+ strncpy((char *)dest, out ? out->name : "", IFNAMSIZ);
break;
case NFT_META_IIFTYPE:
if (in == NULL)
@@ -546,7 +538,7 @@ nft_meta_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG])
return ERR_PTR(-EINVAL);
-#ifdef CONFIG_NF_TABLES_BRIDGE
+#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE) && IS_MODULE(CONFIG_NFT_BRIDGE_META)
if (ctx->family == NFPROTO_BRIDGE)
return ERR_PTR(-EAGAIN);
#endif
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 8487eeff5c0e..43eeb1f609f1 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -291,4 +291,4 @@ module_exit(nft_redir_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
-MODULE_ALIAS_NFT_EXPR("nat");
+MODULE_ALIAS_NFT_EXPR("redir");
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 80060ade8a5b..928e661d1517 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -31,6 +31,8 @@ static void nft_synproxy_tcp_options(struct synproxy_options *opts,
opts->options |= NF_SYNPROXY_OPT_ECN;
opts->options &= priv->info.options;
+ opts->mss_encode = opts->mss;
+ opts->mss = info->mss;
if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
synproxy_init_timestamp_cookie(info, opts);
else
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 96740d389377..c4f54ad2b98a 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -967,6 +967,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
window = skb->data[20];
+ sock_hold(make);
skb->sk = make;
skb->destructor = sock_efree;
make->sk_state = TCP_ESTABLISHED;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 33b388103741..d01410e52097 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1047,7 +1047,7 @@ error:
}
/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
-static struct sw_flow_actions *get_flow_actions(struct net *net,
+static noinline_for_stack struct sw_flow_actions *get_flow_actions(struct net *net,
const struct nlattr *a,
const struct sw_flow_key *key,
const struct sw_flow_mask *mask,
@@ -1081,12 +1081,13 @@ static struct sw_flow_actions *get_flow_actions(struct net *net,
* we should not to return match object with dangling reference
* to mask.
* */
-static int ovs_nla_init_match_and_action(struct net *net,
- struct sw_flow_match *match,
- struct sw_flow_key *key,
- struct nlattr **a,
- struct sw_flow_actions **acts,
- bool log)
+static noinline_for_stack int
+ovs_nla_init_match_and_action(struct net *net,
+ struct sw_flow_match *match,
+ struct sw_flow_key *key,
+ struct nlattr **a,
+ struct sw_flow_actions **acts,
+ bool log)
{
struct sw_flow_mask mask;
int error = 0;
@@ -1958,10 +1959,9 @@ static struct vport *lookup_vport(struct net *net,
}
-/* Called with ovs_mutex */
-static void update_headroom(struct datapath *dp)
+static unsigned int ovs_get_max_headroom(struct datapath *dp)
{
- unsigned dev_headroom, max_headroom = 0;
+ unsigned int dev_headroom, max_headroom = 0;
struct net_device *dev;
struct vport *vport;
int i;
@@ -1975,10 +1975,19 @@ static void update_headroom(struct datapath *dp)
}
}
- dp->max_headroom = max_headroom;
+ return max_headroom;
+}
+
+/* Called with ovs_mutex */
+static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
+{
+ struct vport *vport;
+ int i;
+
+ dp->max_headroom = new_headroom;
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
- netdev_set_rx_headroom(vport->dev, max_headroom);
+ netdev_set_rx_headroom(vport->dev, new_headroom);
}
static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
@@ -1989,6 +1998,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *reply;
struct vport *vport;
struct datapath *dp;
+ unsigned int new_headroom;
u32 port_no;
int err;
@@ -2050,8 +2060,10 @@ restart:
info->snd_portid, info->snd_seq, 0,
OVS_VPORT_CMD_NEW);
- if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
- update_headroom(dp);
+ new_headroom = netdev_get_fwd_headroom(vport->dev);
+
+ if (new_headroom > dp->max_headroom)
+ ovs_update_headroom(dp, new_headroom);
else
netdev_set_rx_headroom(vport->dev, dp->max_headroom);
@@ -2122,11 +2134,12 @@ exit_unlock_free:
static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
{
- bool must_update_headroom = false;
+ bool update_headroom = false;
struct nlattr **a = info->attrs;
struct sk_buff *reply;
struct datapath *dp;
struct vport *vport;
+ unsigned int new_headroom;
int err;
reply = ovs_vport_cmd_alloc_info();
@@ -2152,12 +2165,17 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
/* the vport deletion may trigger dp headroom update */
dp = vport->dp;
if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
- must_update_headroom = true;
+ update_headroom = true;
+
netdev_reset_rx_headroom(vport->dev);
ovs_dp_detach_port(vport);
- if (must_update_headroom)
- update_headroom(dp);
+ if (update_headroom) {
+ new_headroom = ovs_get_max_headroom(dp);
+
+ if (new_headroom < dp->max_headroom)
+ ovs_update_headroom(dp, new_headroom);
+ }
ovs_unlock();
ovs_notify(&dp_vport_genl_family, reply, info);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index dca3b1e2acf0..bc89e16e0505 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -59,7 +59,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
const struct sk_buff *skb)
{
- struct flow_stats *stats;
+ struct sw_flow_stats *stats;
unsigned int cpu = smp_processor_id();
int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
@@ -87,7 +87,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
if (likely(flow->stats_last_writer != -1) &&
likely(!rcu_access_pointer(flow->stats[cpu]))) {
/* Try to allocate CPU-specific stats. */
- struct flow_stats *new_stats;
+ struct sw_flow_stats *new_stats;
new_stats =
kmem_cache_alloc_node(flow_stats_cache,
@@ -134,7 +134,7 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
/* We open code this to make sure cpu 0 is always considered */
for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
- struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
+ struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
if (stats) {
/* Local CPU may write on non-local stats, so we must
@@ -158,7 +158,7 @@ void ovs_flow_stats_clear(struct sw_flow *flow)
/* We open code this to make sure cpu 0 is always considered */
for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
- struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
+ struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
if (stats) {
spin_lock_bh(&stats->lock);
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 3e2cc2202d66..a5506e2d4b7a 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -194,7 +194,7 @@ struct sw_flow_actions {
struct nlattr actions[];
};
-struct flow_stats {
+struct sw_flow_stats {
u64 packet_count; /* Number of packets matched. */
u64 byte_count; /* Number of bytes matched. */
unsigned long used; /* Last used time (in jiffies). */
@@ -216,7 +216,7 @@ struct sw_flow {
struct cpumask cpu_used_mask;
struct sw_flow_mask *mask;
struct sw_flow_actions __rcu *sf_acts;
- struct flow_stats __rcu *stats[]; /* One for each CPU. First one
+ struct sw_flow_stats __rcu *stats[]; /* One for each CPU. First one
* is allocated at flow creation time,
* the rest are allocated on demand
* while holding the 'stats[0].lock'.
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 988fd8a94e43..cf3582c5ed70 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -66,7 +66,7 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
struct sw_flow *ovs_flow_alloc(void)
{
struct sw_flow *flow;
- struct flow_stats *stats;
+ struct sw_flow_stats *stats;
flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL);
if (!flow)
@@ -110,7 +110,7 @@ static void flow_free(struct sw_flow *flow)
for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask))
if (flow->stats[cpu])
kmem_cache_free(flow_stats_cache,
- (struct flow_stats __force *)flow->stats[cpu]);
+ (struct sw_flow_stats __force *)flow->stats[cpu]);
kmem_cache_free(flow_cache, flow);
}
@@ -712,13 +712,13 @@ int ovs_flow_init(void)
flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
+ (nr_cpu_ids
- * sizeof(struct flow_stats *)),
+ * sizeof(struct sw_flow_stats *)),
0, 0, NULL);
if (flow_cache == NULL)
return -ENOMEM;
flow_stats_cache
- = kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats),
+ = kmem_cache_create("sw_flow_stats", sizeof(struct sw_flow_stats),
0, SLAB_HWCACHE_ALIGN, NULL);
if (flow_stats_cache == NULL) {
kmem_cache_destroy(flow_cache);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8d54f3047768..e2742b006d25 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2618,6 +2618,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
mutex_lock(&po->pg_vec_lock);
+ /* packet_sendmsg() check on tx_ring.pg_vec was lockless,
+ * we need to confirm it under protection of pg_vec_lock.
+ */
+ if (unlikely(!po->tx_ring.pg_vec)) {
+ err = -EBUSY;
+ goto out;
+ }
if (likely(saddr == NULL)) {
dev = packet_cached_dev_get(po);
proto = po->num;
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 7ea134f9a825..ed7f2133acc2 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -736,6 +736,7 @@ static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer)
cinfo->next_rx_seq = cp->cp_next_rx_seq;
cinfo->laddr = conn->c_laddr.s6_addr32[3];
cinfo->faddr = conn->c_faddr.s6_addr32[3];
+ cinfo->tos = conn->c_tos;
strncpy(cinfo->transport, conn->c_trans->t_name,
sizeof(cinfo->transport));
cinfo->flags = 0;
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 67a715b076ca..303c6ee8bdb7 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -15,8 +15,7 @@
#define RDS_IB_DEFAULT_RECV_WR 1024
#define RDS_IB_DEFAULT_SEND_WR 256
-#define RDS_IB_DEFAULT_FR_WR 256
-#define RDS_IB_DEFAULT_FR_INV_WR 256
+#define RDS_IB_DEFAULT_FR_WR 512
#define RDS_IB_DEFAULT_RETRY_COUNT 1
@@ -157,7 +156,7 @@ struct rds_ib_connection {
/* To control the number of wrs from fastreg */
atomic_t i_fastreg_wrs;
- atomic_t i_fastunreg_wrs;
+ atomic_t i_fastreg_inuse_count;
/* interrupt handling */
struct tasklet_struct i_send_tasklet;
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 66c6eb56072b..fddaa09f7b0d 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -40,6 +40,7 @@
#include "rds_single_path.h"
#include "rds.h"
#include "ib.h"
+#include "ib_mr.h"
/*
* Set the selected protocol version
@@ -460,10 +461,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
* completion queue and send queue. This extra space is used for FRMR
* registration and invalidation work requests
*/
- fr_queue_space = rds_ibdev->use_fastreg ?
- (RDS_IB_DEFAULT_FR_WR + 1) +
- (RDS_IB_DEFAULT_FR_INV_WR + 1)
- : 0;
+ fr_queue_space = (rds_ibdev->use_fastreg ? RDS_IB_DEFAULT_FR_WR : 0);
/* add the conn now so that connection establishment has the dev */
rds_ib_add_conn(rds_ibdev, conn);
@@ -529,8 +527,6 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
attr.qp_type = IB_QPT_RC;
attr.send_cq = ic->i_send_cq;
attr.recv_cq = ic->i_recv_cq;
- atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR);
- atomic_set(&ic->i_fastunreg_wrs, RDS_IB_DEFAULT_FR_INV_WR);
/*
* XXX this can fail if max_*_wr is too large? Are we supposed
@@ -611,11 +607,11 @@ send_hdrs_dma_out:
qp_out:
rdma_destroy_qp(ic->i_cm_id);
recv_cq_out:
- if (!ib_destroy_cq(ic->i_recv_cq))
- ic->i_recv_cq = NULL;
+ ib_destroy_cq(ic->i_recv_cq);
+ ic->i_recv_cq = NULL;
send_cq_out:
- if (!ib_destroy_cq(ic->i_send_cq))
- ic->i_send_cq = NULL;
+ ib_destroy_cq(ic->i_send_cq);
+ ic->i_send_cq = NULL;
rds_ibdev_out:
rds_ib_remove_conn(rds_ibdev, conn);
out:
@@ -997,6 +993,11 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
ic->i_cm_id, err);
}
+ /* kick off "flush_worker" for all pools in order to reap
+ * all FRMR registrations that are still marked "FRMR_IS_INUSE"
+ */
+ rds_ib_flush_mrs();
+
/*
* We want to wait for tx and rx completion to finish
* before we tear down the connection, but we have to be
@@ -1009,8 +1010,8 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
wait_event(rds_ib_ring_empty_wait,
rds_ib_ring_empty(&ic->i_recv_ring) &&
(atomic_read(&ic->i_signaled_sends) == 0) &&
- (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR) &&
- (atomic_read(&ic->i_fastunreg_wrs) == RDS_IB_DEFAULT_FR_INV_WR));
+ (atomic_read(&ic->i_fastreg_inuse_count) == 0) &&
+ (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR));
tasklet_kill(&ic->i_send_tasklet);
tasklet_kill(&ic->i_recv_tasklet);
@@ -1137,6 +1138,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
spin_lock_init(&ic->i_ack_lock);
#endif
atomic_set(&ic->i_signaled_sends, 0);
+ atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR);
/*
* rds_ib_conn_shutdown() waits for these to be emptied so they
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index 688dcd68d4ea..06ecf9d2d4bf 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c
@@ -32,6 +32,24 @@
#include "ib_mr.h"
+static inline void
+rds_transition_frwr_state(struct rds_ib_mr *ibmr,
+ enum rds_ib_fr_state old_state,
+ enum rds_ib_fr_state new_state)
+{
+ if (cmpxchg(&ibmr->u.frmr.fr_state,
+ old_state, new_state) == old_state &&
+ old_state == FRMR_IS_INUSE) {
+ /* enforce order of ibmr->u.frmr.fr_state update
+ * before decrementing i_fastreg_inuse_count
+ */
+ smp_mb__before_atomic();
+ atomic_dec(&ibmr->ic->i_fastreg_inuse_count);
+ if (waitqueue_active(&rds_ib_ring_empty_wait))
+ wake_up(&rds_ib_ring_empty_wait);
+ }
+}
+
static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
int npages)
{
@@ -75,6 +93,8 @@ static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
pool->max_items_soft = pool->max_items;
frmr->fr_state = FRMR_IS_FREE;
+ init_waitqueue_head(&frmr->fr_inv_done);
+ init_waitqueue_head(&frmr->fr_reg_done);
return ibmr;
out_no_cigar:
@@ -116,13 +136,19 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
if (unlikely(ret != ibmr->sg_len))
return ret < 0 ? ret : -EINVAL;
+ if (cmpxchg(&frmr->fr_state,
+ FRMR_IS_FREE, FRMR_IS_INUSE) != FRMR_IS_FREE)
+ return -EBUSY;
+
+ atomic_inc(&ibmr->ic->i_fastreg_inuse_count);
+
/* Perform a WR for the fast_reg_mr. Each individual page
* in the sg list is added to the fast reg page list and placed
* inside the fast_reg_mr WR. The key used is a rolling 8bit
* counter, which should guarantee uniqueness.
*/
ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++);
- frmr->fr_state = FRMR_IS_INUSE;
+ frmr->fr_reg = true;
memset(&reg_wr, 0, sizeof(reg_wr));
reg_wr.wr.wr_id = (unsigned long)(void *)ibmr;
@@ -138,12 +164,23 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
ret = ib_post_send(ibmr->ic->i_cm_id->qp, &reg_wr.wr, NULL);
if (unlikely(ret)) {
/* Failure here can be because of -ENOMEM as well */
- frmr->fr_state = FRMR_IS_STALE;
+ rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
+
atomic_inc(&ibmr->ic->i_fastreg_wrs);
if (printk_ratelimit())
pr_warn("RDS/IB: %s returned error(%d)\n",
__func__, ret);
+ goto out;
}
+
+ /* Wait for the registration to complete in order to prevent an invalid
+ * access error resulting from a race between the memory region already
+ * being accessed while registration is still pending.
+ */
+ wait_event(frmr->fr_reg_done, !frmr->fr_reg);
+
+out:
+
return ret;
}
@@ -239,8 +276,8 @@ static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
if (frmr->fr_state != FRMR_IS_INUSE)
goto out;
- while (atomic_dec_return(&ibmr->ic->i_fastunreg_wrs) <= 0) {
- atomic_inc(&ibmr->ic->i_fastunreg_wrs);
+ while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) {
+ atomic_inc(&ibmr->ic->i_fastreg_wrs);
cpu_relax();
}
@@ -255,12 +292,29 @@ static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
ret = ib_post_send(i_cm_id->qp, s_wr, NULL);
if (unlikely(ret)) {
- frmr->fr_state = FRMR_IS_STALE;
+ rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
frmr->fr_inv = false;
- atomic_inc(&ibmr->ic->i_fastunreg_wrs);
+ /* enforce order of frmr->fr_inv update
+ * before incrementing i_fastreg_wrs
+ */
+ smp_mb__before_atomic();
+ atomic_inc(&ibmr->ic->i_fastreg_wrs);
pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret);
goto out;
}
+
+ /* Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition in order to
+ * 1) avoid a silly bouncing between "clean_list" and "drop_list"
+ * triggered by function "rds_ib_reg_frmr" as it is releases frmr
+ * regions whose state is not "FRMR_IS_FREE" right away.
+ * 2) prevents an invalid access error in a race
+ * from a pending "IB_WR_LOCAL_INV" operation
+ * with a teardown ("dma_unmap_sg", "put_page")
+ * and de-registration ("ib_dereg_mr") of the corresponding
+ * memory region.
+ */
+ wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE);
+
out:
return ret;
}
@@ -271,7 +325,7 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
struct rds_ib_frmr *frmr = &ibmr->u.frmr;
if (wc->status != IB_WC_SUCCESS) {
- frmr->fr_state = FRMR_IS_STALE;
+ rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
if (rds_conn_up(ic->conn))
rds_ib_conn_error(ic->conn,
"frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n",
@@ -283,12 +337,21 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
}
if (frmr->fr_inv) {
- frmr->fr_state = FRMR_IS_FREE;
+ rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_FREE);
frmr->fr_inv = false;
- atomic_inc(&ic->i_fastreg_wrs);
- } else {
- atomic_inc(&ic->i_fastunreg_wrs);
+ wake_up(&frmr->fr_inv_done);
}
+
+ if (frmr->fr_reg) {
+ frmr->fr_reg = false;
+ wake_up(&frmr->fr_reg_done);
+ }
+
+ /* enforce order of frmr->{fr_reg,fr_inv} update
+ * before incrementing i_fastreg_wrs
+ */
+ smp_mb__before_atomic();
+ atomic_inc(&ic->i_fastreg_wrs);
}
void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
@@ -296,14 +359,18 @@ void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
{
struct rds_ib_mr *ibmr, *next;
struct rds_ib_frmr *frmr;
- int ret = 0;
+ int ret = 0, ret2;
unsigned int freed = *nfreed;
/* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
list_for_each_entry(ibmr, list, unmap_list) {
- if (ibmr->sg_dma_len)
- ret |= rds_ib_post_inv(ibmr);
+ if (ibmr->sg_dma_len) {
+ ret2 = rds_ib_post_inv(ibmr);
+ if (ret2 && !ret)
+ ret = ret2;
+ }
}
+
if (ret)
pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret);
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
index 5da12c248431..9045a8c0edff 100644
--- a/net/rds/ib_mr.h
+++ b/net/rds/ib_mr.h
@@ -57,6 +57,9 @@ struct rds_ib_frmr {
struct ib_mr *mr;
enum rds_ib_fr_state fr_state;
bool fr_inv;
+ wait_queue_head_t fr_inv_done;
+ bool fr_reg;
+ wait_queue_head_t fr_reg_done;
struct ib_send_wr fr_wr;
unsigned int dma_npages;
unsigned int sg_byte_len;
@@ -97,6 +100,7 @@ struct rds_ib_mr_pool {
struct llist_head free_list; /* unused MRs */
struct llist_head clean_list; /* unused & unmapped MRs */
wait_queue_head_t flush_wait;
+ spinlock_t clean_lock; /* "clean_list" concurrency */
atomic_t free_pinned; /* memory pinned by free MRs */
unsigned long max_items;
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 0b347f46b2f4..c8c1e3ae8d84 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -40,9 +40,6 @@
struct workqueue_struct *rds_ib_mr_wq;
-static DEFINE_PER_CPU(unsigned long, clean_list_grace);
-#define CLEAN_LIST_BUSY_BIT 0
-
static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
{
struct rds_ib_device *rds_ibdev;
@@ -195,12 +192,11 @@ struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool)
{
struct rds_ib_mr *ibmr = NULL;
struct llist_node *ret;
- unsigned long *flag;
+ unsigned long flags;
- preempt_disable();
- flag = this_cpu_ptr(&clean_list_grace);
- set_bit(CLEAN_LIST_BUSY_BIT, flag);
+ spin_lock_irqsave(&pool->clean_lock, flags);
ret = llist_del_first(&pool->clean_list);
+ spin_unlock_irqrestore(&pool->clean_lock, flags);
if (ret) {
ibmr = llist_entry(ret, struct rds_ib_mr, llnode);
if (pool->pool_type == RDS_IB_MR_8K_POOL)
@@ -209,23 +205,9 @@ struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool)
rds_ib_stats_inc(s_ib_rdma_mr_1m_reused);
}
- clear_bit(CLEAN_LIST_BUSY_BIT, flag);
- preempt_enable();
return ibmr;
}
-static inline void wait_clean_list_grace(void)
-{
- int cpu;
- unsigned long *flag;
-
- for_each_online_cpu(cpu) {
- flag = &per_cpu(clean_list_grace, cpu);
- while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
- cpu_relax();
- }
-}
-
void rds_ib_sync_mr(void *trans_private, int direction)
{
struct rds_ib_mr *ibmr = trans_private;
@@ -324,8 +306,7 @@ static unsigned int llist_append_to_list(struct llist_head *llist,
* of clusters. Each cluster has linked llist nodes of
* MR_CLUSTER_SIZE mrs that are ready for reuse.
*/
-static void list_to_llist_nodes(struct rds_ib_mr_pool *pool,
- struct list_head *list,
+static void list_to_llist_nodes(struct list_head *list,
struct llist_node **nodes_head,
struct llist_node **nodes_tail)
{
@@ -402,8 +383,13 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
*/
dirty_to_clean = llist_append_to_list(&pool->drop_list, &unmap_list);
dirty_to_clean += llist_append_to_list(&pool->free_list, &unmap_list);
- if (free_all)
+ if (free_all) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&pool->clean_lock, flags);
llist_append_to_list(&pool->clean_list, &unmap_list);
+ spin_unlock_irqrestore(&pool->clean_lock, flags);
+ }
free_goal = rds_ib_flush_goal(pool, free_all);
@@ -416,27 +402,20 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
rds_ib_unreg_fmr(&unmap_list, &nfreed, &unpinned, free_goal);
if (!list_empty(&unmap_list)) {
- /* we have to make sure that none of the things we're about
- * to put on the clean list would race with other cpus trying
- * to pull items off. The llist would explode if we managed to
- * remove something from the clean list and then add it back again
- * while another CPU was spinning on that same item in llist_del_first.
- *
- * This is pretty unlikely, but just in case wait for an llist grace period
- * here before adding anything back into the clean list.
- */
- wait_clean_list_grace();
-
- list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail);
+ unsigned long flags;
+
+ list_to_llist_nodes(&unmap_list, &clean_nodes, &clean_tail);
if (ibmr_ret) {
*ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode);
clean_nodes = clean_nodes->next;
}
/* more than one entry in llist nodes */
- if (clean_nodes)
+ if (clean_nodes) {
+ spin_lock_irqsave(&pool->clean_lock, flags);
llist_add_batch(clean_nodes, clean_tail,
&pool->clean_list);
-
+ spin_unlock_irqrestore(&pool->clean_lock, flags);
+ }
}
atomic_sub(unpinned, &pool->free_pinned);
@@ -471,7 +450,7 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool)
rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted);
else
rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted);
- return ERR_PTR(-EAGAIN);
+ break;
}
/* We do have some empty MRs. Flush them out. */
@@ -485,7 +464,7 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool)
return ibmr;
}
- return ibmr;
+ return NULL;
}
static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
@@ -610,6 +589,7 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
init_llist_head(&pool->free_list);
init_llist_head(&pool->drop_list);
init_llist_head(&pool->clean_list);
+ spin_lock_init(&pool->clean_lock);
mutex_init(&pool->flush_lock);
init_waitqueue_head(&pool->flush_wait);
INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 18f2341202f8..dfe6237dafe2 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -69,6 +69,16 @@ static void rds_ib_send_complete(struct rds_message *rm,
complete(rm, notify_status);
}
+static void rds_ib_send_unmap_data(struct rds_ib_connection *ic,
+ struct rm_data_op *op,
+ int wc_status)
+{
+ if (op->op_nents)
+ ib_dma_unmap_sg(ic->i_cm_id->device,
+ op->op_sg, op->op_nents,
+ DMA_TO_DEVICE);
+}
+
static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic,
struct rm_rdma_op *op,
int wc_status)
@@ -129,21 +139,6 @@ static void rds_ib_send_unmap_atomic(struct rds_ib_connection *ic,
rds_ib_stats_inc(s_ib_atomic_fadd);
}
-static void rds_ib_send_unmap_data(struct rds_ib_connection *ic,
- struct rm_data_op *op,
- int wc_status)
-{
- struct rds_message *rm = container_of(op, struct rds_message, data);
-
- if (op->op_nents)
- ib_dma_unmap_sg(ic->i_cm_id->device,
- op->op_sg, op->op_nents,
- DMA_TO_DEVICE);
-
- if (rm->rdma.op_active && rm->data.op_notify)
- rds_ib_send_unmap_rdma(ic, &rm->rdma, wc_status);
-}
-
/*
* Unmap the resources associated with a struct send_work.
*
@@ -902,7 +897,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
send->s_queued = jiffies;
send->s_op = NULL;
- nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify);
+ if (!op->op_notify)
+ nr_sig += rds_ib_set_wr_signal_state(ic, send,
+ op->op_notify);
send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
send->s_rdma_wr.remote_addr = remote_addr;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index b340ed4fc43a..916f5ec373d8 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -641,16 +641,6 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
}
op->op_notifier->n_user_token = args->user_token;
op->op_notifier->n_status = RDS_RDMA_SUCCESS;
-
- /* Enable rmda notification on data operation for composite
- * rds messages and make sure notification is enabled only
- * for the data operation which follows it so that application
- * gets notified only after full message gets delivered.
- */
- if (rm->data.op_sg) {
- rm->rdma.op_notify = 0;
- rm->data.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
- }
}
/* The cookie contains the R_Key of the remote memory region, and
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 46bce8389066..9986d6065c4d 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -105,24 +105,28 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
break;
case RDMA_CM_EVENT_ESTABLISHED:
- trans->cm_connect_complete(conn, event);
+ if (conn)
+ trans->cm_connect_complete(conn, event);
break;
case RDMA_CM_EVENT_REJECTED:
if (!conn)
break;
err = (int *)rdma_consumer_reject_data(cm_id, event, &len);
- if (!err || (err && ((*err) == RDS_RDMA_REJ_INCOMPAT))) {
+ if (!err ||
+ (err && len >= sizeof(*err) &&
+ ((*err) <= RDS_RDMA_REJ_INCOMPAT))) {
pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n",
&conn->c_laddr, &conn->c_faddr);
- conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION;
- conn->c_tos = 0;
+
+ if (!conn->c_tos)
+ conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION;
+
rds_conn_drop(conn);
}
rdsdebug("Connection rejected: %s\n",
rdma_reject_msg(cm_id, event->status));
break;
- /* FALLTHROUGH */
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
case RDMA_CM_EVENT_CONNECT_ERROR:
@@ -134,6 +138,8 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
break;
case RDMA_CM_EVENT_DISCONNECTED:
+ if (!conn)
+ break;
rdsdebug("DISCONNECT event - dropping connection "
"%pI6c->%pI6c\n", &conn->c_laddr,
&conn->c_faddr);
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 0d8f67cadd74..f0066d168499 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -476,7 +476,6 @@ struct rds_message {
} rdma;
struct rm_data_op {
unsigned int op_active:1;
- unsigned int op_notify:1;
unsigned int op_nents;
unsigned int op_count;
unsigned int op_dmasg;
diff --git a/net/rds/send.c b/net/rds/send.c
index 166dd578c1cc..031b1e97a466 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -491,14 +491,12 @@ void rds_rdma_send_complete(struct rds_message *rm, int status)
struct rm_rdma_op *ro;
struct rds_notifier *notifier;
unsigned long flags;
- unsigned int notify = 0;
spin_lock_irqsave(&rm->m_rs_lock, flags);
- notify = rm->rdma.op_notify | rm->data.op_notify;
ro = &rm->rdma;
if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) &&
- ro->op_active && notify && ro->op_notifier) {
+ ro->op_active && ro->op_notify && ro->op_notifier) {
notifier = ro->op_notifier;
rs = rm->m_rs;
sock_hold(rds_rs_to_sk(rs));
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index d09eaf153544..0dbbfd1b6487 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -193,7 +193,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
service_in_use:
write_unlock(&local->services_lock);
- rxrpc_put_local(local);
+ rxrpc_unuse_local(local);
ret = -EADDRINUSE;
error_unlock:
release_sock(&rx->sk);
@@ -402,7 +402,7 @@ EXPORT_SYMBOL(rxrpc_kernel_check_life);
*/
void rxrpc_kernel_probe_life(struct socket *sock, struct rxrpc_call *call)
{
- rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
+ rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false,
rxrpc_propose_ack_ping_for_check_life);
rxrpc_send_ack_packet(call, true, NULL);
}
@@ -901,7 +901,7 @@ static int rxrpc_release_sock(struct sock *sk)
rxrpc_queue_work(&rxnet->service_conn_reaper);
rxrpc_queue_work(&rxnet->client_conn_reaper);
- rxrpc_put_local(rx->local);
+ rxrpc_unuse_local(rx->local);
rx->local = NULL;
key_put(rx->key);
rx->key = NULL;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 80335b4ee4fd..145335611af6 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -254,7 +254,8 @@ struct rxrpc_security {
*/
struct rxrpc_local {
struct rcu_head rcu;
- atomic_t usage;
+ atomic_t active_users; /* Number of users of the local endpoint */
+ atomic_t usage; /* Number of references to the structure */
struct rxrpc_net *rxnet; /* The network ns in which this resides */
struct list_head link;
struct socket *socket; /* my UDP socket */
@@ -649,7 +650,6 @@ struct rxrpc_call {
/* receive-phase ACK management */
u8 ackr_reason; /* reason to ACK */
- u16 ackr_skew; /* skew on packet being ACK'd */
rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
rxrpc_serial_t ackr_first_seq; /* first sequence number received */
rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */
@@ -743,7 +743,7 @@ int rxrpc_reject_call(struct rxrpc_sock *);
/*
* call_event.c
*/
-void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool,
+void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool,
enum rxrpc_propose_ack_trace);
void rxrpc_process_call(struct work_struct *);
@@ -1002,6 +1002,8 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc
struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *);
struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *);
void rxrpc_put_local(struct rxrpc_local *);
+struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *);
+void rxrpc_unuse_local(struct rxrpc_local *);
void rxrpc_queue_local(struct rxrpc_local *);
void rxrpc_destroy_all_locals(struct rxrpc_net *);
@@ -1061,6 +1063,7 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *);
struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *);
struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *);
void rxrpc_put_peer(struct rxrpc_peer *);
+void rxrpc_put_peer_locked(struct rxrpc_peer *);
/*
* proc.c
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index bc2adeb3acb9..c767679bfa5d 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -43,8 +43,7 @@ static void rxrpc_propose_ping(struct rxrpc_call *call,
* propose an ACK be sent
*/
static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
- u16 skew, u32 serial, bool immediate,
- bool background,
+ u32 serial, bool immediate, bool background,
enum rxrpc_propose_ack_trace why)
{
enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use;
@@ -69,14 +68,12 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
if (RXRPC_ACK_UPDATEABLE & (1 << ack_reason)) {
outcome = rxrpc_propose_ack_update;
call->ackr_serial = serial;
- call->ackr_skew = skew;
}
if (!immediate)
goto trace;
} else if (prior > rxrpc_ack_priority[call->ackr_reason]) {
call->ackr_reason = ack_reason;
call->ackr_serial = serial;
- call->ackr_skew = skew;
} else {
outcome = rxrpc_propose_ack_subsume;
}
@@ -137,11 +134,11 @@ trace:
* propose an ACK be sent, locking the call structure
*/
void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
- u16 skew, u32 serial, bool immediate, bool background,
+ u32 serial, bool immediate, bool background,
enum rxrpc_propose_ack_trace why)
{
spin_lock_bh(&call->lock);
- __rxrpc_propose_ACK(call, ack_reason, skew, serial,
+ __rxrpc_propose_ACK(call, ack_reason, serial,
immediate, background, why);
spin_unlock_bh(&call->lock);
}
@@ -239,7 +236,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
ack_ts = ktime_sub(now, call->acks_latest_ts);
if (ktime_to_ns(ack_ts) < call->peer->rtt)
goto out;
- rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
+ rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false,
rxrpc_propose_ack_ping_for_lost_ack);
rxrpc_send_ack_packet(call, true, NULL);
goto out;
@@ -372,7 +369,7 @@ recheck_state:
if (time_after_eq(now, t)) {
trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now);
cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET);
- rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, true,
+ rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, true,
rxrpc_propose_ack_ping_for_keepalive);
set_bit(RXRPC_CALL_EV_PING, &call->events);
}
@@ -407,7 +404,7 @@ recheck_state:
send_ack = NULL;
if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) {
call->acks_lost_top = call->tx_top;
- rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
+ rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false,
rxrpc_propose_ack_ping_for_lost_ack);
send_ack = &call->acks_lost_ping;
}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 5bd6f1546e5c..dd47d465d1d3 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -196,15 +196,14 @@ send_extra_data:
* Ping the other end to fill our RTT cache and to retrieve the rwind
* and MTU parameters.
*/
-static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb,
- int skew)
+static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
ktime_t now = skb->tstamp;
if (call->peer->rtt_usage < 3 ||
ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now))
- rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial,
+ rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial,
true, true,
rxrpc_propose_ack_ping_for_params);
}
@@ -419,8 +418,7 @@ static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq,
/*
* Process a DATA packet, adding the packet to the Rx ring.
*/
-static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
- u16 skew)
+static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
enum rxrpc_call_state state;
@@ -600,11 +598,11 @@ skip:
ack:
if (ack)
- rxrpc_propose_ACK(call, ack, skew, ack_serial,
+ rxrpc_propose_ACK(call, ack, ack_serial,
immediate_ack, true,
rxrpc_propose_ack_input_data);
else
- rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, skew, serial,
+ rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial,
false, true,
rxrpc_propose_ack_input_data);
@@ -822,8 +820,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
* soft-ACK means that the packet may be discarded and retransmission
* requested. A phase is complete when all packets are hard-ACK'd.
*/
-static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
- u16 skew)
+static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_ack_summary summary = { 0 };
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
@@ -867,11 +864,11 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
if (buf.ack.reason == RXRPC_ACK_PING) {
_proto("Rx ACK %%%u PING Request", sp->hdr.serial);
rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
- skew, sp->hdr.serial, true, true,
+ sp->hdr.serial, true, true,
rxrpc_propose_ack_respond_to_ping);
} else if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED,
- skew, sp->hdr.serial, true, true,
+ sp->hdr.serial, true, true,
rxrpc_propose_ack_respond_to_ack);
}
@@ -948,7 +945,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
RXRPC_TX_ANNO_LAST &&
summary.nr_acks == call->tx_top - hard_ack &&
rxrpc_is_client_call(call))
- rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial,
+ rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial,
false, true,
rxrpc_propose_ack_ping_for_lost_reply);
@@ -1004,7 +1001,7 @@ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb)
* Process an incoming call packet.
*/
static void rxrpc_input_call_packet(struct rxrpc_call *call,
- struct sk_buff *skb, u16 skew)
+ struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
unsigned long timo;
@@ -1023,11 +1020,11 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
switch (sp->hdr.type) {
case RXRPC_PACKET_TYPE_DATA:
- rxrpc_input_data(call, skb, skew);
+ rxrpc_input_data(call, skb);
break;
case RXRPC_PACKET_TYPE_ACK:
- rxrpc_input_ack(call, skb, skew);
+ rxrpc_input_ack(call, skb);
break;
case RXRPC_PACKET_TYPE_BUSY:
@@ -1108,8 +1105,12 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
{
_enter("%p,%p", local, skb);
- skb_queue_tail(&local->event_queue, skb);
- rxrpc_queue_local(local);
+ if (rxrpc_get_local_maybe(local)) {
+ skb_queue_tail(&local->event_queue, skb);
+ rxrpc_queue_local(local);
+ } else {
+ rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+ }
}
/*
@@ -1119,8 +1120,12 @@ static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
{
CHECK_SLAB_OKAY(&local->usage);
- skb_queue_tail(&local->reject_queue, skb);
- rxrpc_queue_local(local);
+ if (rxrpc_get_local_maybe(local)) {
+ skb_queue_tail(&local->reject_queue, skb);
+ rxrpc_queue_local(local);
+ } else {
+ rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+ }
}
/*
@@ -1173,7 +1178,6 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
struct rxrpc_peer *peer = NULL;
struct rxrpc_sock *rx = NULL;
unsigned int channel;
- int skew = 0;
_enter("%p", udp_sk);
@@ -1301,15 +1305,8 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
goto out;
}
- /* Note the serial number skew here */
- skew = (int)sp->hdr.serial - (int)conn->hi_serial;
- if (skew >= 0) {
- if (skew > 0)
- conn->hi_serial = sp->hdr.serial;
- } else {
- skew = -skew;
- skew = min(skew, 65535);
- }
+ if ((int)sp->hdr.serial - (int)conn->hi_serial > 0)
+ conn->hi_serial = sp->hdr.serial;
/* Call-bound packets are routed by connection channel. */
channel = sp->hdr.cid & RXRPC_CHANNELMASK;
@@ -1372,11 +1369,11 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
call = rxrpc_new_incoming_call(local, rx, skb);
if (!call)
goto reject_packet;
- rxrpc_send_ping(call, skb, skew);
+ rxrpc_send_ping(call, skb);
mutex_unlock(&call->user_mutex);
}
- rxrpc_input_call_packet(call, skb, skew);
+ rxrpc_input_call_packet(call, skb);
goto discard;
discard:
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index b1c71bad510b..72a6e12a9304 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -79,6 +79,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
if (local) {
atomic_set(&local->usage, 1);
+ atomic_set(&local->active_users, 1);
local->rxnet = rxnet;
INIT_LIST_HEAD(&local->link);
INIT_WORK(&local->processor, rxrpc_local_processor);
@@ -92,7 +93,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
local->debug_id = atomic_inc_return(&rxrpc_debug_id);
memcpy(&local->srx, srx, sizeof(*srx));
local->srx.srx_service = 0;
- trace_rxrpc_local(local, rxrpc_local_new, 1, NULL);
+ trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, NULL);
}
_leave(" = %p", local);
@@ -266,11 +267,8 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
* bind the transport socket may still fail if we're attempting
* to use a local address that the dying object is still using.
*/
- if (!rxrpc_get_local_maybe(local)) {
- cursor = cursor->next;
- list_del_init(&local->link);
+ if (!rxrpc_use_local(local))
break;
- }
age = "old";
goto found;
@@ -284,7 +282,10 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
if (ret < 0)
goto sock_error;
- list_add_tail(&local->link, cursor);
+ if (cursor != &rxnet->local_endpoints)
+ list_replace_init(cursor, &local->link);
+ else
+ list_add_tail(&local->link, cursor);
age = "new";
found:
@@ -320,7 +321,7 @@ struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local)
int n;
n = atomic_inc_return(&local->usage);
- trace_rxrpc_local(local, rxrpc_local_got, n, here);
+ trace_rxrpc_local(local->debug_id, rxrpc_local_got, n, here);
return local;
}
@@ -334,7 +335,8 @@ struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
if (local) {
int n = atomic_fetch_add_unless(&local->usage, 1, 0);
if (n > 0)
- trace_rxrpc_local(local, rxrpc_local_got, n + 1, here);
+ trace_rxrpc_local(local->debug_id, rxrpc_local_got,
+ n + 1, here);
else
local = NULL;
}
@@ -342,24 +344,18 @@ struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
}
/*
- * Queue a local endpoint.
+ * Queue a local endpoint and pass the caller's reference to the work item.
*/
void rxrpc_queue_local(struct rxrpc_local *local)
{
const void *here = __builtin_return_address(0);
+ unsigned int debug_id = local->debug_id;
+ int n = atomic_read(&local->usage);
if (rxrpc_queue_work(&local->processor))
- trace_rxrpc_local(local, rxrpc_local_queued,
- atomic_read(&local->usage), here);
-}
-
-/*
- * A local endpoint reached its end of life.
- */
-static void __rxrpc_put_local(struct rxrpc_local *local)
-{
- _enter("%d", local->debug_id);
- rxrpc_queue_work(&local->processor);
+ trace_rxrpc_local(debug_id, rxrpc_local_queued, n, here);
+ else
+ rxrpc_put_local(local);
}
/*
@@ -372,10 +368,47 @@ void rxrpc_put_local(struct rxrpc_local *local)
if (local) {
n = atomic_dec_return(&local->usage);
- trace_rxrpc_local(local, rxrpc_local_put, n, here);
+ trace_rxrpc_local(local->debug_id, rxrpc_local_put, n, here);
if (n == 0)
- __rxrpc_put_local(local);
+ call_rcu(&local->rcu, rxrpc_local_rcu);
+ }
+}
+
+/*
+ * Start using a local endpoint.
+ */
+struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local)
+{
+ unsigned int au;
+
+ local = rxrpc_get_local_maybe(local);
+ if (!local)
+ return NULL;
+
+ au = atomic_fetch_add_unless(&local->active_users, 1, 0);
+ if (au == 0) {
+ rxrpc_put_local(local);
+ return NULL;
+ }
+
+ return local;
+}
+
+/*
+ * Cease using a local endpoint. Once the number of active users reaches 0, we
+ * start the closure of the transport in the work processor.
+ */
+void rxrpc_unuse_local(struct rxrpc_local *local)
+{
+ unsigned int au;
+
+ if (local) {
+ au = atomic_dec_return(&local->active_users);
+ if (au == 0)
+ rxrpc_queue_local(local);
+ else
+ rxrpc_put_local(local);
}
}
@@ -393,16 +426,6 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local)
_enter("%d", local->debug_id);
- /* We can get a race between an incoming call packet queueing the
- * processor again and the work processor starting the destruction
- * process which will shut down the UDP socket.
- */
- if (local->dead) {
- _leave(" [already dead]");
- return;
- }
- local->dead = true;
-
mutex_lock(&rxnet->local_mutex);
list_del_init(&local->link);
mutex_unlock(&rxnet->local_mutex);
@@ -422,13 +445,11 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local)
*/
rxrpc_purge_queue(&local->reject_queue);
rxrpc_purge_queue(&local->event_queue);
-
- _debug("rcu local %d", local->debug_id);
- call_rcu(&local->rcu, rxrpc_local_rcu);
}
/*
- * Process events on an endpoint
+ * Process events on an endpoint. The work item carries a ref which
+ * we must release.
*/
static void rxrpc_local_processor(struct work_struct *work)
{
@@ -436,13 +457,15 @@ static void rxrpc_local_processor(struct work_struct *work)
container_of(work, struct rxrpc_local, processor);
bool again;
- trace_rxrpc_local(local, rxrpc_local_processing,
+ trace_rxrpc_local(local->debug_id, rxrpc_local_processing,
atomic_read(&local->usage), NULL);
do {
again = false;
- if (atomic_read(&local->usage) == 0)
- return rxrpc_local_destroyer(local);
+ if (atomic_read(&local->active_users) == 0) {
+ rxrpc_local_destroyer(local);
+ break;
+ }
if (!skb_queue_empty(&local->reject_queue)) {
rxrpc_reject_packets(local);
@@ -454,6 +477,8 @@ static void rxrpc_local_processor(struct work_struct *work)
again = true;
}
} while (again);
+
+ rxrpc_put_local(local);
}
/*
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 948e3fe249ec..369e516c4bdf 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -87,7 +87,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
*_top = top;
pkt->ack.bufferSpace = htons(8);
- pkt->ack.maxSkew = htons(call->ackr_skew);
+ pkt->ack.maxSkew = htons(0);
pkt->ack.firstPacket = htonl(hard_ack + 1);
pkt->ack.previousPacket = htonl(call->ackr_prev_seq);
pkt->ack.serial = htonl(serial);
@@ -228,7 +228,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
if (ping)
clear_bit(RXRPC_CALL_PINGING, &call->flags);
rxrpc_propose_ACK(call, pkt->ack.reason,
- ntohs(pkt->ack.maxSkew),
ntohl(pkt->ack.serial),
false, true,
rxrpc_propose_ack_retry_tx);
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 9f2f45c09e58..7666ec72d37e 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -378,7 +378,7 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
spin_lock_bh(&rxnet->peer_hash_lock);
list_add_tail(&peer->keepalive_link,
&rxnet->peer_keepalive[slot & mask]);
- rxrpc_put_peer(peer);
+ rxrpc_put_peer_locked(peer);
}
spin_unlock_bh(&rxnet->peer_hash_lock);
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 9d3ce81cf8ae..9c3ac96f71cb 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -437,6 +437,24 @@ void rxrpc_put_peer(struct rxrpc_peer *peer)
}
/*
+ * Drop a ref on a peer record where the caller already holds the
+ * peer_hash_lock.
+ */
+void rxrpc_put_peer_locked(struct rxrpc_peer *peer)
+{
+ const void *here = __builtin_return_address(0);
+ int n;
+
+ n = atomic_dec_return(&peer->usage);
+ trace_rxrpc_peer(peer, rxrpc_peer_put, n, here);
+ if (n == 0) {
+ hash_del_rcu(&peer->hash_link);
+ list_del_init(&peer->keepalive_link);
+ kfree_rcu(peer, rcu);
+ }
+}
+
+/*
* Make sure all peer records have been discarded.
*/
void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet)
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 5abf46cf9e6c..9a7e1bc9791d 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -141,7 +141,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
ASSERTCMP(call->rx_hard_ack, ==, call->rx_top);
if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) {
- rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, false, true,
+ rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, serial, false, true,
rxrpc_propose_ack_terminal_ack);
//rxrpc_send_ack_packet(call, false, NULL);
}
@@ -159,7 +159,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
call->expect_req_by = jiffies + MAX_JIFFY_OFFSET;
write_unlock_bh(&call->state_lock);
- rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, false, true,
+ rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false, true,
rxrpc_propose_ack_processing_op);
break;
default:
@@ -212,7 +212,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
if (after_eq(hard_ack, call->ackr_consumed + 2) ||
after_eq(top, call->ackr_seen + 2) ||
(hard_ack == top && after(hard_ack, call->ackr_consumed)))
- rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial,
+ rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial,
true, true,
rxrpc_propose_ack_rotate_rx);
if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY)
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 5d3f33ce6d41..bae14438f869 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -226,6 +226,7 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
rxrpc_set_call_completion(call,
RXRPC_CALL_LOCAL_ERROR,
0, ret);
+ rxrpc_notify_socket(call);
goto out;
}
_debug("need instant resend %d", ret);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 1e3fa67d91aa..2bbb38161851 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -11,7 +11,6 @@
#include "ar-internal.h"
static struct ctl_table_header *rxrpc_sysctl_reg_table;
-static const unsigned int one = 1;
static const unsigned int four = 4;
static const unsigned int thirtytwo = 32;
static const unsigned int n_65535 = 65535;
@@ -97,7 +96,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = (void *)&one,
+ .extra1 = (void *)SYSCTL_ONE,
.extra2 = (void *)&rxrpc_max_client_connections,
},
{
@@ -115,7 +114,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = (void *)&one,
+ .extra1 = (void *)SYSCTL_ONE,
.extra2 = (void *)&n_max_acks,
},
{
@@ -124,7 +123,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = (void *)&one,
+ .extra1 = (void *)SYSCTL_ONE,
.extra2 = (void *)&n_65535,
},
{
@@ -133,7 +132,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = (void *)&one,
+ .extra1 = (void *)SYSCTL_ONE,
.extra2 = (void *)&four,
},
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index dd55b9ac3a66..afd2ba157a13 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -942,7 +942,7 @@ config NET_ACT_TUNNEL_KEY
config NET_ACT_CT
tristate "connection tracking tc action"
- depends on NET_CLS_ACT && NF_CONNTRACK
+ depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT
help
Say Y here to allow sending the packets to conntrack module.
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 8126b26f125e..fd1f7e799e23 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -285,6 +285,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
struct tcf_bpf *prog;
bool is_bpf, is_ebpf;
int ret, res = 0;
+ u32 index;
if (!nla)
return -EINVAL;
@@ -298,13 +299,13 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
return -EINVAL;
parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
-
- ret = tcf_idr_check_alloc(tn, &parm->index, act, bind);
+ index = parm->index;
+ ret = tcf_idr_check_alloc(tn, &index, act, bind);
if (!ret) {
- ret = tcf_idr_create(tn, parm->index, est, act,
+ ret = tcf_idr_create(tn, index, est, act,
&act_bpf_ops, bind, true);
if (ret < 0) {
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return ret;
}
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index ce36b0f7e1dc..32ac04d77a45 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -103,6 +103,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
struct tcf_connmark_info *ci;
struct tc_connmark *parm;
int ret = 0, err;
+ u32 index;
if (!nla)
return -EINVAL;
@@ -116,13 +117,13 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
return -EINVAL;
parm = nla_data(tb[TCA_CONNMARK_PARMS]);
-
- ret = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+ index = parm->index;
+ ret = tcf_idr_check_alloc(tn, &index, a, bind);
if (!ret) {
- ret = tcf_idr_create(tn, parm->index, est, a,
+ ret = tcf_idr_create(tn, index, est, a,
&act_connmark_ops, bind, false);
if (ret) {
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return ret;
}
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 621fb22ce2a9..9b9288267a54 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -52,6 +52,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
struct tc_csum *parm;
struct tcf_csum *p;
int ret = 0, err;
+ u32 index;
if (nla == NULL)
return -EINVAL;
@@ -64,13 +65,13 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
if (tb[TCA_CSUM_PARMS] == NULL)
return -EINVAL;
parm = nla_data(tb[TCA_CSUM_PARMS]);
-
- err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+ index = parm->index;
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
- ret = tcf_idr_create(tn, parm->index, est, a,
+ ret = tcf_idr_create(tn, index, est, a,
&act_csum_ops, bind, true);
if (ret) {
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return ret;
}
ret = ACT_P_CREATED;
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index b501ce0cf116..33a1a7406e87 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -666,6 +666,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
struct tc_ct *parm;
struct tcf_ct *c;
int err, res = 0;
+ u32 index;
if (!nla) {
NL_SET_ERR_MSG_MOD(extack, "Ct requires attributes to be passed");
@@ -681,16 +682,16 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
return -EINVAL;
}
parm = nla_data(tb[TCA_CT_PARMS]);
-
- err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+ index = parm->index;
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
if (err < 0)
return err;
if (!err) {
- err = tcf_idr_create(tn, parm->index, est, a,
+ err = tcf_idr_create(tn, index, est, a,
&act_ct_ops, bind, true);
if (err) {
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return err;
}
res = ACT_P_CREATED;
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index 10eb2bb99861..06ef74b74911 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -157,10 +157,10 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+ u32 dscpmask = 0, dscpstatemask, index;
struct nlattr *tb[TCA_CTINFO_MAX + 1];
struct tcf_ctinfo_params *cp_new;
struct tcf_chain *goto_ch = NULL;
- u32 dscpmask = 0, dscpstatemask;
struct tc_ctinfo *actparm;
struct tcf_ctinfo *ci;
u8 dscpmaskshift;
@@ -206,12 +206,13 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
}
/* done the validation:now to the actual action allocation */
- err = tcf_idr_check_alloc(tn, &actparm->index, a, bind);
+ index = actparm->index;
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
- ret = tcf_idr_create(tn, actparm->index, est, a,
+ ret = tcf_idr_create(tn, index, est, a,
&act_ctinfo_ops, bind, false);
if (ret) {
- tcf_idr_cleanup(tn, actparm->index);
+ tcf_idr_cleanup(tn, index);
return ret;
}
ret = ACT_P_CREATED;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index b2380c5284e6..8f0140c6ca58 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -61,6 +61,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
struct tc_gact *parm;
struct tcf_gact *gact;
int ret = 0;
+ u32 index;
int err;
#ifdef CONFIG_GACT_PROB
struct tc_gact_p *p_parm = NULL;
@@ -77,6 +78,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
if (tb[TCA_GACT_PARMS] == NULL)
return -EINVAL;
parm = nla_data(tb[TCA_GACT_PARMS]);
+ index = parm->index;
#ifndef CONFIG_GACT_PROB
if (tb[TCA_GACT_PROB] != NULL)
@@ -94,12 +96,12 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
}
#endif
- err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
- ret = tcf_idr_create(tn, parm->index, est, a,
+ ret = tcf_idr_create(tn, index, est, a,
&act_gact_ops, bind, true);
if (ret) {
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return ret;
}
ret = ACT_P_CREATED;
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 41d5398dd2f2..92ee853d43e6 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -479,8 +479,14 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
u8 *saddr = NULL;
bool exists = false;
int ret = 0;
+ u32 index;
int err;
+ if (!nla) {
+ NL_SET_ERR_MSG_MOD(extack, "IFE requires attributes to be passed");
+ return -EINVAL;
+ }
+
err = nla_parse_nested_deprecated(tb, TCA_IFE_MAX, nla, ife_policy,
NULL);
if (err < 0)
@@ -502,7 +508,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
if (!p)
return -ENOMEM;
- err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+ index = parm->index;
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
if (err < 0) {
kfree(p);
return err;
@@ -514,10 +521,10 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
}
if (!exists) {
- ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops,
+ ret = tcf_idr_create(tn, index, est, a, &act_ife_ops,
bind, true);
if (ret) {
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
kfree(p);
return ret;
}
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 055faa298c8e..be3f88dfc37e 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -104,6 +104,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct net_device *dev;
bool exists = false;
int ret, err;
+ u32 index;
if (!nla) {
NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed");
@@ -118,8 +119,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
return -EINVAL;
}
parm = nla_data(tb[TCA_MIRRED_PARMS]);
-
- err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+ index = parm->index;
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
if (err < 0)
return err;
exists = err;
@@ -136,21 +137,21 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
if (exists)
tcf_idr_release(*a, bind);
else
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option");
return -EINVAL;
}
if (!exists) {
if (!parm->ifindex) {
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
return -EINVAL;
}
- ret = tcf_idr_create(tn, parm->index, est, a,
+ ret = tcf_idr_create(tn, index, est, a,
&act_mirred_ops, bind, true);
if (ret) {
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return ret;
}
ret = ACT_P_CREATED;
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index ca2597ce4ac9..0f299e3b618c 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -138,6 +138,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
struct tcf_mpls *m;
int ret = 0, err;
u8 mpls_ttl = 0;
+ u32 index;
if (!nla) {
NL_SET_ERR_MSG_MOD(extack, "Missing netlink attributes");
@@ -153,6 +154,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
return -EINVAL;
}
parm = nla_data(tb[TCA_MPLS_PARMS]);
+ index = parm->index;
/* Verify parameters against action type. */
switch (parm->m_action) {
@@ -209,7 +211,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
return -EINVAL;
}
- err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
if (err < 0)
return err;
exists = err;
@@ -217,10 +219,10 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
return 0;
if (!exists) {
- ret = tcf_idr_create(tn, parm->index, est, a,
+ ret = tcf_idr_create(tn, index, est, a,
&act_mpls_ops, bind, true);
if (ret) {
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return ret;
}
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 45923ebb7a4f..7b858c11b1b5 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -44,6 +44,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
struct tc_nat *parm;
int ret = 0, err;
struct tcf_nat *p;
+ u32 index;
if (nla == NULL)
return -EINVAL;
@@ -56,13 +57,13 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
if (tb[TCA_NAT_PARMS] == NULL)
return -EINVAL;
parm = nla_data(tb[TCA_NAT_PARMS]);
-
- err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+ index = parm->index;
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
- ret = tcf_idr_create(tn, parm->index, est, a,
+ ret = tcf_idr_create(tn, index, est, a,
&act_nat_ops, bind, false);
if (ret) {
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return ret;
}
ret = ACT_P_CREATED;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 45e9d6bfddb3..17360c6faeaa 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -149,6 +149,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct tcf_pedit *p;
int ret = 0, err;
int ksize;
+ u32 index;
if (!nla) {
NL_SET_ERR_MSG_MOD(extack, "Pedit requires attributes to be passed");
@@ -179,18 +180,19 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
if (IS_ERR(keys_ex))
return PTR_ERR(keys_ex);
- err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+ index = parm->index;
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
if (!parm->nkeys) {
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed");
ret = -EINVAL;
goto out_free;
}
- ret = tcf_idr_create(tn, parm->index, est, a,
+ ret = tcf_idr_create(tn, index, est, a,
&act_pedit_ops, bind, false);
if (ret) {
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
goto out_free;
}
ret = ACT_P_CREATED;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index a065f62fa79c..49cec3e64a4d 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -57,6 +57,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
struct tc_action_net *tn = net_generic(net, police_net_id);
struct tcf_police_params *new;
bool exists = false;
+ u32 index;
if (nla == NULL)
return -EINVAL;
@@ -73,7 +74,8 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
return -EINVAL;
parm = nla_data(tb[TCA_POLICE_TBF]);
- err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+ index = parm->index;
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
if (err < 0)
return err;
exists = err;
@@ -81,10 +83,10 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
return 0;
if (!exists) {
- ret = tcf_idr_create(tn, parm->index, NULL, a,
+ ret = tcf_idr_create(tn, index, NULL, a,
&act_police_ops, bind, true);
if (ret) {
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return ret;
}
ret = ACT_P_CREATED;
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 274d7a0c0e25..595308d60133 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -41,8 +41,8 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
struct tc_action_net *tn = net_generic(net, sample_net_id);
struct nlattr *tb[TCA_SAMPLE_MAX + 1];
struct psample_group *psample_group;
+ u32 psample_group_num, rate, index;
struct tcf_chain *goto_ch = NULL;
- u32 psample_group_num, rate;
struct tc_sample *parm;
struct tcf_sample *s;
bool exists = false;
@@ -59,8 +59,8 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
return -EINVAL;
parm = nla_data(tb[TCA_SAMPLE_PARMS]);
-
- err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+ index = parm->index;
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
if (err < 0)
return err;
exists = err;
@@ -68,10 +68,10 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
return 0;
if (!exists) {
- ret = tcf_idr_create(tn, parm->index, est, a,
+ ret = tcf_idr_create(tn, index, est, a,
&act_sample_ops, bind, true);
if (ret) {
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return ret;
}
ret = ACT_P_CREATED;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index f28ddbabff76..33aefa25b545 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -95,6 +95,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
struct tcf_defact *d;
bool exists = false;
int ret = 0, err;
+ u32 index;
if (nla == NULL)
return -EINVAL;
@@ -108,7 +109,8 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
return -EINVAL;
parm = nla_data(tb[TCA_DEF_PARMS]);
- err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+ index = parm->index;
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
if (err < 0)
return err;
exists = err;
@@ -119,15 +121,15 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
if (exists)
tcf_idr_release(*a, bind);
else
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return -EINVAL;
}
if (!exists) {
- ret = tcf_idr_create(tn, parm->index, est, a,
+ ret = tcf_idr_create(tn, index, est, a,
&act_simp_ops, bind, false);
if (ret) {
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return ret;
}
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 215a06705cef..37dced00b63d 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -99,6 +99,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
u16 *queue_mapping = NULL, *ptype = NULL;
bool exists = false;
int ret = 0, err;
+ u32 index;
if (nla == NULL)
return -EINVAL;
@@ -146,8 +147,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
}
parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
-
- err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+ index = parm->index;
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
if (err < 0)
return err;
exists = err;
@@ -158,15 +159,15 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
if (exists)
tcf_idr_release(*a, bind);
else
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return -EINVAL;
}
if (!exists) {
- ret = tcf_idr_create(tn, parm->index, est, a,
+ ret = tcf_idr_create(tn, index, est, a,
&act_skbedit_ops, bind, true);
if (ret) {
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return ret;
}
@@ -306,6 +307,17 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
return tcf_idr_search(tn, a, index);
}
+static size_t tcf_skbedit_get_fill_size(const struct tc_action *act)
+{
+ return nla_total_size(sizeof(struct tc_skbedit))
+ + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_PRIORITY */
+ + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING */
+ + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MARK */
+ + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_PTYPE */
+ + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MASK */
+ + nla_total_size_64bit(sizeof(u64)); /* TCA_SKBEDIT_FLAGS */
+}
+
static struct tc_action_ops act_skbedit_ops = {
.kind = "skbedit",
.id = TCA_ID_SKBEDIT,
@@ -315,6 +327,7 @@ static struct tc_action_ops act_skbedit_ops = {
.init = tcf_skbedit_init,
.cleanup = tcf_skbedit_cleanup,
.walk = tcf_skbedit_walker,
+ .get_fill_size = tcf_skbedit_get_fill_size,
.lookup = tcf_skbedit_search,
.size = sizeof(struct tcf_skbedit),
};
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 4f07706eff07..7da3518e18ef 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -87,12 +87,12 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
struct tcf_skbmod_params *p, *p_old;
struct tcf_chain *goto_ch = NULL;
struct tc_skbmod *parm;
+ u32 lflags = 0, index;
struct tcf_skbmod *d;
bool exists = false;
u8 *daddr = NULL;
u8 *saddr = NULL;
u16 eth_type = 0;
- u32 lflags = 0;
int ret = 0, err;
if (!nla)
@@ -122,10 +122,11 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
}
parm = nla_data(tb[TCA_SKBMOD_PARMS]);
+ index = parm->index;
if (parm->flags & SKBMOD_F_SWAPMAC)
lflags = SKBMOD_F_SWAPMAC;
- err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
if (err < 0)
return err;
exists = err;
@@ -136,15 +137,15 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
if (exists)
tcf_idr_release(*a, bind);
else
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return -EINVAL;
}
if (!exists) {
- ret = tcf_idr_create(tn, parm->index, est, a,
+ ret = tcf_idr_create(tn, index, est, a,
&act_skbmod_ops, bind, true);
if (ret) {
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return ret;
}
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 10dffda1d5cc..6d0debdc9b97 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -225,6 +225,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
__be16 flags = 0;
u8 tos, ttl;
int ret = 0;
+ u32 index;
int err;
if (!nla) {
@@ -245,7 +246,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
}
parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]);
- err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+ index = parm->index;
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
if (err < 0)
return err;
exists = err;
@@ -345,7 +347,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
}
if (!exists) {
- ret = tcf_idr_create(tn, parm->index, est, a,
+ ret = tcf_idr_create(tn, index, est, a,
&act_tunnel_key_ops, bind, true);
if (ret) {
NL_SET_ERR_MSG(extack, "Cannot create TC IDR");
@@ -403,7 +405,7 @@ err_out:
if (exists)
tcf_idr_release(*a, bind);
else
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return ret;
}
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 9269d350fb8a..a3c9eea1ee8a 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -116,6 +116,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
u8 push_prio = 0;
bool exists = false;
int ret = 0, err;
+ u32 index;
if (!nla)
return -EINVAL;
@@ -128,7 +129,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
if (!tb[TCA_VLAN_PARMS])
return -EINVAL;
parm = nla_data(tb[TCA_VLAN_PARMS]);
- err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+ index = parm->index;
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
if (err < 0)
return err;
exists = err;
@@ -144,7 +146,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
if (exists)
tcf_idr_release(*a, bind);
else
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return -EINVAL;
}
push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]);
@@ -152,7 +154,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
if (exists)
tcf_idr_release(*a, bind);
else
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return -ERANGE;
}
@@ -166,7 +168,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
if (exists)
tcf_idr_release(*a, bind);
else
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return -EPROTONOSUPPORT;
}
} else {
@@ -180,16 +182,16 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
if (exists)
tcf_idr_release(*a, bind);
else
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return -EINVAL;
}
action = parm->v_action;
if (!exists) {
- ret = tcf_idr_create(tn, parm->index, est, a,
+ ret = tcf_idr_create(tn, index, est, a,
&act_vlan_ops, bind, true);
if (ret) {
- tcf_idr_cleanup(tn, parm->index);
+ tcf_idr_cleanup(tn, index);
return ret;
}
@@ -306,6 +308,14 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
return tcf_idr_search(tn, a, index);
}
+static size_t tcf_vlan_get_fill_size(const struct tc_action *act)
+{
+ return nla_total_size(sizeof(struct tc_vlan))
+ + nla_total_size(sizeof(u16)) /* TCA_VLAN_PUSH_VLAN_ID */
+ + nla_total_size(sizeof(u16)) /* TCA_VLAN_PUSH_VLAN_PROTOCOL */
+ + nla_total_size(sizeof(u8)); /* TCA_VLAN_PUSH_VLAN_PRIORITY */
+}
+
static struct tc_action_ops act_vlan_ops = {
.kind = "vlan",
.id = TCA_ID_VLAN,
@@ -315,6 +325,7 @@ static struct tc_action_ops act_vlan_ops = {
.init = tcf_vlan_init,
.cleanup = tcf_vlan_cleanup,
.walk = tcf_vlan_walker,
+ .get_fill_size = tcf_vlan_get_fill_size,
.lookup = tcf_vlan_search,
.size = sizeof(struct tcf_vlan),
};
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 638c1bc1ea1b..efd3cfb80a2a 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -684,13 +684,15 @@ static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev,
.command = command,
.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
.net = dev_net(indr_dev->dev),
- .block_shared = tcf_block_shared(indr_dev->block),
+ .block_shared = tcf_block_non_null_shared(indr_dev->block),
};
INIT_LIST_HEAD(&bo.cb_list);
if (!indr_dev->block)
return;
+ bo.block = &indr_dev->block->flow_block;
+
indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
&bo);
tcf_block_setup(indr_dev->block, &bo);
@@ -775,6 +777,7 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
.command = command,
.binder_type = ei->binder_type,
.net = dev_net(dev),
+ .block = &block->flow_block,
.block_shared = tcf_block_shared(block),
.extack = extack,
};
@@ -810,6 +813,7 @@ static int tcf_block_offload_cmd(struct tcf_block *block,
bo.net = dev_net(dev);
bo.command = command;
bo.binder_type = ei->binder_type;
+ bo.block = &block->flow_block;
bo.block_shared = tcf_block_shared(block);
bo.extack = extack;
INIT_LIST_HEAD(&bo.cb_list);
@@ -987,8 +991,8 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
return ERR_PTR(-ENOMEM);
}
mutex_init(&block->lock);
+ flow_block_init(&block->flow_block);
INIT_LIST_HEAD(&block->chain_list);
- INIT_LIST_HEAD(&block->cb_list);
INIT_LIST_HEAD(&block->owner_list);
INIT_LIST_HEAD(&block->chain0.filter_chain_list);
@@ -1514,7 +1518,7 @@ void tcf_block_put(struct tcf_block *block)
EXPORT_SYMBOL(tcf_block_put);
static int
-tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
+tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
void *cb_priv, bool add, bool offload_in_use,
struct netlink_ext_ack *extack)
{
@@ -1570,7 +1574,7 @@ static int tcf_block_bind(struct tcf_block *block,
i++;
}
- list_splice(&bo->cb_list, &block->cb_list);
+ list_splice(&bo->cb_list, &block->flow_block.cb_list);
return 0;
@@ -2152,6 +2156,9 @@ replay:
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
RTM_NEWTFILTER, false, rtnl_held);
tfilter_put(tp, fh);
+ /* q pointer is NULL for shared blocks */
+ if (q)
+ q->flags &= ~TCQ_F_CAN_BYPASS;
}
errout:
@@ -3155,7 +3162,7 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
if (block->nooffloaddevcnt && err_stop)
return -EOPNOTSUPP;
- list_for_each_entry(block_cb, &block->cb_list, list) {
+ list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
err = block_cb->cb(type, type_data, block_cb->cb_priv);
if (err) {
if (err_stop)
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 691f71830134..3f7a9c02b70c 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -651,7 +651,7 @@ skip:
}
}
-static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
void *cb_priv, struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 38d6e85693fc..054123742e32 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1800,7 +1800,7 @@ fl_get_next_hw_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool add)
return NULL;
}
-static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
void *cb_priv, struct netlink_ext_ack *extack)
{
struct tcf_block *block = tp->chain->block;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index a30d2f8feb32..455ea2793f9b 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -282,7 +282,7 @@ skip:
arg->count++;
}
-static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
void *cb_priv, struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index be9e46c77e8b..8614088edd1b 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -1152,7 +1152,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg,
}
static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
- bool add, tc_setup_cb_t *cb, void *cb_priv,
+ bool add, flow_setup_cb_t *cb, void *cb_priv,
struct netlink_ext_ack *extack)
{
struct tc_cls_u32_offload cls_u32 = {};
@@ -1172,7 +1172,7 @@ static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
}
static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n,
- bool add, tc_setup_cb_t *cb, void *cb_priv,
+ bool add, flow_setup_cb_t *cb, void *cb_priv,
struct netlink_ext_ack *extack)
{
struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
@@ -1213,7 +1213,7 @@ static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n,
return 0;
}
-static int u32_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int u32_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
void *cb_priv, struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 25ef172c23df..30169b3adbbb 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -71,10 +71,10 @@ static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx)
struct Qdisc *sch = ctx;
struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
- if (skb)
+ if (skb) {
sch->qstats.backlog -= qdisc_pkt_len(skb);
-
- prefetch(&skb->end); /* we'll need skb_shinfo() */
+ prefetch(&skb->end); /* we'll need skb_shinfo() */
+ }
return skb;
}
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index e2faf33d282b..d59fbcc745d1 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -596,8 +596,6 @@ static unsigned long fq_codel_find(struct Qdisc *sch, u32 classid)
static unsigned long fq_codel_bind(struct Qdisc *sch, unsigned long parent,
u32 classid)
{
- /* we cannot bypass queue discipline anymore */
- sch->flags &= ~TCQ_F_CAN_BYPASS;
return 0;
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 420bd8411677..68404a9d2ce4 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -824,8 +824,6 @@ static unsigned long sfq_find(struct Qdisc *sch, u32 classid)
static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
u32 classid)
{
- /* we cannot bypass queue discipline anymore */
- sch->flags &= ~TCQ_F_CAN_BYPASS;
return 0;
}
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 388750ddc57a..e25d414ae12f 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -75,7 +75,7 @@ struct taprio_sched {
struct sched_gate_list __rcu *admin_sched;
struct hrtimer advance_timer;
struct list_head taprio_list;
- int txtime_delay;
+ u32 txtime_delay;
};
static ktime_t sched_base_time(const struct sched_gate_list *sched)
@@ -1113,7 +1113,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
goto unlock;
}
- q->txtime_delay = nla_get_s32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]);
+ q->txtime_delay = nla_get_u32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]);
}
if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) &&
@@ -1195,7 +1195,8 @@ unlock:
spin_unlock_bh(qdisc_lock(sch));
free_sched:
- kfree(new_admin);
+ if (new_admin)
+ call_rcu(&new_admin->rcu, taprio_free_sched_cb);
return err;
}
@@ -1430,7 +1431,7 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
goto options_error;
if (q->txtime_delay &&
- nla_put_s32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
+ nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
goto options_error;
if (oper && dump_schedule(skb, oper))
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index ed39396b9bba..36bd8a6e82df 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2582,8 +2582,7 @@ do_addr_param:
case SCTP_PARAM_STATE_COOKIE:
asoc->peer.cookie_len =
ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
- if (asoc->peer.cookie)
- kfree(asoc->peer.cookie);
+ kfree(asoc->peer.cookie);
asoc->peer.cookie = kmemdup(param.cookie->body, asoc->peer.cookie_len, gfp);
if (!asoc->peer.cookie)
retval = 0;
@@ -2648,8 +2647,7 @@ do_addr_param:
goto fall_through;
/* Save peer's random parameter */
- if (asoc->peer.peer_random)
- kfree(asoc->peer.peer_random);
+ kfree(asoc->peer.peer_random);
asoc->peer.peer_random = kmemdup(param.p,
ntohs(param.p->length), gfp);
if (!asoc->peer.peer_random) {
@@ -2663,8 +2661,7 @@ do_addr_param:
goto fall_through;
/* Save peer's HMAC list */
- if (asoc->peer.peer_hmacs)
- kfree(asoc->peer.peer_hmacs);
+ kfree(asoc->peer.peer_hmacs);
asoc->peer.peer_hmacs = kmemdup(param.p,
ntohs(param.p->length), gfp);
if (!asoc->peer.peer_hmacs) {
@@ -2680,8 +2677,7 @@ do_addr_param:
if (!ep->auth_enable)
goto fall_through;
- if (asoc->peer.peer_chunks)
- kfree(asoc->peer.peer_chunks);
+ kfree(asoc->peer.peer_chunks);
asoc->peer.peer_chunks = kmemdup(param.p,
ntohs(param.p->length), gfp);
if (!asoc->peer.peer_chunks)
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index a554d6d15d1b..1cf5bb5b73c4 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -546,7 +546,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_cmd_seq *commands,
*/
if (net->sctp.pf_enable &&
(transport->state == SCTP_ACTIVE) &&
- (asoc->pf_retrans < transport->pathmaxrxt) &&
+ (transport->error_count < transport->pathmaxrxt) &&
(transport->error_count > asoc->pf_retrans)) {
sctp_assoc_control_transport(asoc, transport,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index aa80cda36581..9d1f83b10c0a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -985,7 +985,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
return -EINVAL;
kaddrs = memdup_user(addrs, addrs_size);
- if (unlikely(IS_ERR(kaddrs)))
+ if (IS_ERR(kaddrs))
return PTR_ERR(kaddrs);
/* Walk through the addrs buffer and count the number of addresses. */
@@ -1315,7 +1315,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
return -EINVAL;
kaddrs = memdup_user(addrs, addrs_size);
- if (unlikely(IS_ERR(kaddrs)))
+ if (IS_ERR(kaddrs))
return PTR_ERR(kaddrs);
/* Allow security module to validate connectx addresses. */
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 25946604af85..e83cdaa2ab76 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -316,6 +316,7 @@ int sctp_send_reset_streams(struct sctp_association *asoc,
nstr_list[i] = htons(str_list[i]);
if (out && !sctp_stream_outq_is_empty(stream, str_nums, nstr_list)) {
+ kfree(nstr_list);
retval = -EAGAIN;
goto out;
}
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 9a19147902f1..1250751bca1b 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -25,10 +25,7 @@
#include <net/sctp/sctp.h>
#include <linux/sysctl.h>
-static int zero = 0;
-static int one = 1;
static int timer_max = 86400000; /* ms in one day */
-static int int_max = INT_MAX;
static int sack_timer_min = 1;
static int sack_timer_max = 500;
static int addr_scope_max = SCTP_SCOPE_POLICY_MAX;
@@ -92,7 +89,7 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
.extra2 = &timer_max
},
{
@@ -101,7 +98,7 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_sctp_do_rto_min,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
.extra2 = &init_net.sctp.rto_max
},
{
@@ -137,8 +134,8 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &int_max
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
},
{
.procname = "cookie_preserve_enable",
@@ -160,7 +157,7 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
.extra2 = &timer_max
},
{
@@ -178,7 +175,7 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
.extra2 = &timer_max
},
{
@@ -187,8 +184,8 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- .extra2 = &int_max
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_INT_MAX,
},
{
.procname = "path_max_retrans",
@@ -196,8 +193,8 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- .extra2 = &int_max
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_INT_MAX,
},
{
.procname = "max_init_retransmits",
@@ -205,8 +202,8 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- .extra2 = &int_max
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_INT_MAX,
},
{
.procname = "pf_retrans",
@@ -214,8 +211,8 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &int_max
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
},
{
.procname = "sndbuf_policy",
@@ -286,7 +283,7 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &addr_scope_max,
},
{
@@ -295,7 +292,7 @@ static struct ctl_table sctp_net_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
.extra2 = &rwnd_scale_max,
},
{
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 302e355f2ebc..5b932583e407 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -263,7 +263,7 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
/* Check if socket is already active */
rc = -EINVAL;
- if (sk->sk_state != SMC_INIT)
+ if (sk->sk_state != SMC_INIT || smc->connect_nonblock)
goto out_rel;
smc->clcsock->sk->sk_reuse = sk->sk_reuse;
@@ -1390,7 +1390,8 @@ static int smc_listen(struct socket *sock, int backlog)
lock_sock(sk);
rc = -EINVAL;
- if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN))
+ if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) ||
+ smc->connect_nonblock)
goto out;
rc = 0;
@@ -1518,7 +1519,7 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
goto out;
if (msg->msg_flags & MSG_FASTOPEN) {
- if (sk->sk_state == SMC_INIT) {
+ if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
smc_switch_to_fallback(smc);
smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
} else {
@@ -1732,14 +1733,18 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
}
break;
case TCP_NODELAY:
- if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
+ if (sk->sk_state != SMC_INIT &&
+ sk->sk_state != SMC_LISTEN &&
+ sk->sk_state != SMC_CLOSED) {
if (val && !smc->use_fallback)
mod_delayed_work(system_wq, &smc->conn.tx_work,
0);
}
break;
case TCP_CORK:
- if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
+ if (sk->sk_state != SMC_INIT &&
+ sk->sk_state != SMC_LISTEN &&
+ sk->sk_state != SMC_CLOSED) {
if (!val && !smc->use_fallback)
mod_delayed_work(system_wq, &smc->conn.tx_work,
0);
diff --git a/net/socket.c b/net/socket.c
index 16449d6daeca..6a9ab7a8b1d2 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -73,6 +73,7 @@
#include <linux/module.h>
#include <linux/highmem.h>
#include <linux/mount.h>
+#include <linux/pseudo_fs.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/compat.h>
@@ -338,19 +339,22 @@ static const struct xattr_handler *sockfs_xattr_handlers[] = {
NULL
};
-static struct dentry *sockfs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int sockfs_init_fs_context(struct fs_context *fc)
{
- return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
- sockfs_xattr_handlers,
- &sockfs_dentry_operations, SOCKFS_MAGIC);
+ struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
+ if (!ctx)
+ return -ENOMEM;
+ ctx->ops = &sockfs_ops;
+ ctx->dops = &sockfs_dentry_operations;
+ ctx->xattr = sockfs_xattr_handlers;
+ return 0;
}
static struct vfsmount *sock_mnt __read_mostly;
static struct file_system_type sock_fs_type = {
.name = "sockfs",
- .mount = sockfs_mount,
+ .init_fs_context = sockfs_init_fs_context,
.kill_sb = kill_anon_super,
};
@@ -2222,9 +2226,10 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
kmsg->msg_iocb = NULL;
- return import_iovec(save_addr ? READ : WRITE,
+ err = import_iovec(save_addr ? READ : WRITE,
msg.msg_iov, msg.msg_iovlen,
UIO_FASTIOV, iov, &kmsg->msg_iter);
+ return err < 0 ? err : 0;
}
static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
@@ -2326,6 +2331,13 @@ out_freeiov:
/*
* BSD sendmsg interface
*/
+long __sys_sendmsg_sock(struct socket *sock, struct user_msghdr __user *msg,
+ unsigned int flags)
+{
+ struct msghdr msg_sys;
+
+ return ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
+}
long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
bool forbid_cmsg_compat)
@@ -2500,6 +2512,14 @@ out_freeiov:
* BSD recvmsg interface
*/
+long __sys_recvmsg_sock(struct socket *sock, struct user_msghdr __user *msg,
+ unsigned int flags)
+{
+ struct msghdr msg_sys;
+
+ return ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
+}
+
long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
bool forbid_cmsg_compat)
{
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index aa307505ca54..3bcf985507be 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -35,7 +35,7 @@ config RPCSEC_GSS_KRB5
If unsure, say Y.
-config CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES
+config SUNRPC_DISABLE_INSECURE_ENCTYPES
bool "Secure RPC: Disable insecure Kerberos encryption types"
depends on RPCSEC_GSS_KRB5
default n
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index c47d82622fd1..339e8c077c2d 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -31,25 +31,20 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define RPCDBG_FACILITY RPCDBG_TRANS
#endif
+#define BC_MAX_SLOTS 64U
+
+unsigned int xprt_bc_max_slots(struct rpc_xprt *xprt)
+{
+ return BC_MAX_SLOTS;
+}
+
/*
* Helper routines that track the number of preallocation elements
* on the transport.
*/
static inline int xprt_need_to_requeue(struct rpc_xprt *xprt)
{
- return xprt->bc_alloc_count < atomic_read(&xprt->bc_free_slots);
-}
-
-static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n)
-{
- atomic_add(n, &xprt->bc_free_slots);
- xprt->bc_alloc_count += n;
-}
-
-static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n)
-{
- atomic_sub(n, &xprt->bc_free_slots);
- return xprt->bc_alloc_count -= n;
+ return xprt->bc_alloc_count < xprt->bc_alloc_max;
}
/*
@@ -145,6 +140,9 @@ int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs)
dprintk("RPC: setup backchannel transport\n");
+ if (min_reqs > BC_MAX_SLOTS)
+ min_reqs = BC_MAX_SLOTS;
+
/*
* We use a temporary list to keep track of the preallocated
* buffers. Once we're done building the list we splice it
@@ -172,7 +170,9 @@ int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs)
*/
spin_lock(&xprt->bc_pa_lock);
list_splice(&tmp_list, &xprt->bc_pa_list);
- xprt_inc_alloc_count(xprt, min_reqs);
+ xprt->bc_alloc_count += min_reqs;
+ xprt->bc_alloc_max += min_reqs;
+ atomic_add(min_reqs, &xprt->bc_slot_count);
spin_unlock(&xprt->bc_pa_lock);
dprintk("RPC: setup backchannel transport done\n");
@@ -220,11 +220,13 @@ void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs)
goto out;
spin_lock_bh(&xprt->bc_pa_lock);
- xprt_dec_alloc_count(xprt, max_reqs);
+ xprt->bc_alloc_max -= max_reqs;
list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
dprintk("RPC: req=%p\n", req);
list_del(&req->rq_bc_pa_list);
xprt_free_allocation(req);
+ xprt->bc_alloc_count--;
+ atomic_dec(&xprt->bc_slot_count);
if (--max_reqs == 0)
break;
}
@@ -241,13 +243,14 @@ static struct rpc_rqst *xprt_get_bc_request(struct rpc_xprt *xprt, __be32 xid,
struct rpc_rqst *req = NULL;
dprintk("RPC: allocate a backchannel request\n");
- if (atomic_read(&xprt->bc_free_slots) <= 0)
- goto not_found;
if (list_empty(&xprt->bc_pa_list)) {
if (!new)
goto not_found;
+ if (atomic_read(&xprt->bc_slot_count) >= BC_MAX_SLOTS)
+ goto not_found;
list_add_tail(&new->rq_bc_pa_list, &xprt->bc_pa_list);
xprt->bc_alloc_count++;
+ atomic_inc(&xprt->bc_slot_count);
}
req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst,
rq_bc_pa_list);
@@ -291,6 +294,7 @@ void xprt_free_bc_rqst(struct rpc_rqst *req)
if (xprt_need_to_requeue(xprt)) {
list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
xprt->bc_alloc_count++;
+ atomic_inc(&xprt->bc_slot_count);
req = NULL;
}
spin_unlock_bh(&xprt->bc_pa_lock);
@@ -357,7 +361,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
spin_lock(&xprt->bc_pa_lock);
list_del(&req->rq_bc_pa_list);
- xprt_dec_alloc_count(xprt, 1);
+ xprt->bc_alloc_count--;
spin_unlock(&xprt->bc_pa_lock);
req->rq_private_buf.len = copied;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index b03bfa055c08..d8679b6027e9 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -528,6 +528,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
.bc_xprt = args->bc_xprt,
};
char servername[48];
+ struct rpc_clnt *clnt;
+ int i;
if (args->bc_xprt) {
WARN_ON_ONCE(!(args->protocol & XPRT_TRANSPORT_BC));
@@ -590,7 +592,15 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
xprt->resvport = 0;
- return rpc_create_xprt(args, xprt);
+ clnt = rpc_create_xprt(args, xprt);
+ if (IS_ERR(clnt) || args->nconnect <= 1)
+ return clnt;
+
+ for (i = 0; i < args->nconnect - 1; i++) {
+ if (rpc_clnt_add_xprt(clnt, &xprtargs, NULL, NULL) < 0)
+ break;
+ }
+ return clnt;
}
EXPORT_SYMBOL_GPL(rpc_create);
@@ -968,13 +978,46 @@ out:
}
EXPORT_SYMBOL_GPL(rpc_bind_new_program);
+struct rpc_xprt *
+rpc_task_get_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
+{
+ struct rpc_xprt_switch *xps;
+
+ if (!xprt)
+ return NULL;
+ rcu_read_lock();
+ xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
+ atomic_long_inc(&xps->xps_queuelen);
+ rcu_read_unlock();
+ atomic_long_inc(&xprt->queuelen);
+
+ return xprt;
+}
+
+static void
+rpc_task_release_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
+{
+ struct rpc_xprt_switch *xps;
+
+ atomic_long_dec(&xprt->queuelen);
+ rcu_read_lock();
+ xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
+ atomic_long_dec(&xps->xps_queuelen);
+ rcu_read_unlock();
+
+ xprt_put(xprt);
+}
+
void rpc_task_release_transport(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_xprt;
if (xprt) {
task->tk_xprt = NULL;
- xprt_put(xprt);
+ if (task->tk_client)
+ rpc_task_release_xprt(task->tk_client, xprt);
+ else
+ xprt_put(xprt);
}
}
EXPORT_SYMBOL_GPL(rpc_task_release_transport);
@@ -983,6 +1026,7 @@ void rpc_task_release_client(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
+ rpc_task_release_transport(task);
if (clnt != NULL) {
/* Remove from client task list */
spin_lock(&clnt->cl_lock);
@@ -992,14 +1036,34 @@ void rpc_task_release_client(struct rpc_task *task)
rpc_release_client(clnt);
}
- rpc_task_release_transport(task);
+}
+
+static struct rpc_xprt *
+rpc_task_get_first_xprt(struct rpc_clnt *clnt)
+{
+ struct rpc_xprt *xprt;
+
+ rcu_read_lock();
+ xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
+ rcu_read_unlock();
+ return rpc_task_get_xprt(clnt, xprt);
+}
+
+static struct rpc_xprt *
+rpc_task_get_next_xprt(struct rpc_clnt *clnt)
+{
+ return rpc_task_get_xprt(clnt, xprt_iter_get_next(&clnt->cl_xpi));
}
static
void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt)
{
- if (!task->tk_xprt)
- task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi);
+ if (task->tk_xprt)
+ return;
+ if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN)
+ task->tk_xprt = rpc_task_get_first_xprt(clnt);
+ else
+ task->tk_xprt = rpc_task_get_next_xprt(clnt);
}
static
@@ -1462,6 +1526,19 @@ size_t rpc_max_bc_payload(struct rpc_clnt *clnt)
}
EXPORT_SYMBOL_GPL(rpc_max_bc_payload);
+unsigned int rpc_num_bc_slots(struct rpc_clnt *clnt)
+{
+ struct rpc_xprt *xprt;
+ unsigned int ret;
+
+ rcu_read_lock();
+ xprt = rcu_dereference(clnt->cl_xprt);
+ ret = xprt->ops->bc_num_slots(xprt);
+ rcu_read_unlock();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(rpc_num_bc_slots);
+
/**
* rpc_force_rebind - force transport to check that remote port is unchanged
* @clnt: client to rebind
@@ -1788,6 +1865,7 @@ rpc_xdr_encode(struct rpc_task *task)
req->rq_snd_buf.head[0].iov_len = 0;
xdr_init_encode(&xdr, &req->rq_snd_buf,
req->rq_snd_buf.head[0].iov_base, req);
+ xdr_free_bvec(&req->rq_snd_buf);
if (rpc_encode_header(task, &xdr))
return;
@@ -1827,8 +1905,6 @@ call_encode(struct rpc_task *task)
rpc_call_rpcerror(task, task->tk_status);
}
return;
- } else {
- xprt_request_prepare(task->tk_rqstp);
}
/* Add task to reply queue before transmission to avoid races */
@@ -2696,6 +2772,10 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
return -ENOMEM;
data->xps = xprt_switch_get(xps);
data->xprt = xprt_get(xprt);
+ if (rpc_xprt_switch_has_addr(data->xps, (struct sockaddr *)&xprt->addr)) {
+ rpc_cb_add_xprt_release(data);
+ goto success;
+ }
task = rpc_call_null_helper(clnt, xprt, NULL,
RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC|RPC_TASK_NULLCREDS,
@@ -2703,6 +2783,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
if (IS_ERR(task))
return PTR_ERR(task);
rpc_put_task(task);
+success:
return 1;
}
EXPORT_SYMBOL_GPL(rpc_clnt_test_and_add_xprt);
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 95ebd76b132d..fd9bca242724 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/**
+/*
* debugfs interface for sunrpc
*
* (c) 2014 Jeff Layton <jlayton@primarydata.com>
@@ -11,7 +11,6 @@
#include "netns.h"
static struct dentry *topdir;
-static struct dentry *rpc_fault_dir;
static struct dentry *rpc_clnt_dir;
static struct dentry *rpc_xprt_dir;
@@ -118,16 +117,37 @@ static const struct file_operations tasks_fops = {
.release = tasks_release,
};
-void
-rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
+static int do_xprt_debugfs(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *numv)
{
int len;
char name[24]; /* enough for "../../rpc_xprt/ + 8 hex digits + NULL */
- struct rpc_xprt *xprt;
+ char link[9]; /* enough for 8 hex digits + NULL */
+ int *nump = numv;
- /* Already registered? */
- if (clnt->cl_debugfs || !rpc_clnt_dir)
- return;
+ if (IS_ERR_OR_NULL(xprt->debugfs))
+ return 0;
+ len = snprintf(name, sizeof(name), "../../rpc_xprt/%s",
+ xprt->debugfs->d_name.name);
+ if (len > sizeof(name))
+ return -1;
+ if (*nump == 0)
+ strcpy(link, "xprt");
+ else {
+ len = snprintf(link, sizeof(link), "xprt%d", *nump);
+ if (len > sizeof(link))
+ return -1;
+ }
+ debugfs_create_symlink(link, clnt->cl_debugfs, name);
+ (*nump)++;
+ return 0;
+}
+
+void
+rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
+{
+ int len;
+ char name[9]; /* enough for 8 hex digits + NULL */
+ int xprtnum = 0;
len = snprintf(name, sizeof(name), "%x", clnt->cl_clid);
if (len >= sizeof(name))
@@ -135,35 +155,12 @@ rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
/* make the per-client dir */
clnt->cl_debugfs = debugfs_create_dir(name, rpc_clnt_dir);
- if (!clnt->cl_debugfs)
- return;
/* make tasks file */
- if (!debugfs_create_file("tasks", S_IFREG | 0400, clnt->cl_debugfs,
- clnt, &tasks_fops))
- goto out_err;
-
- rcu_read_lock();
- xprt = rcu_dereference(clnt->cl_xprt);
- /* no "debugfs" dentry? Don't bother with the symlink. */
- if (IS_ERR_OR_NULL(xprt->debugfs)) {
- rcu_read_unlock();
- return;
- }
- len = snprintf(name, sizeof(name), "../../rpc_xprt/%s",
- xprt->debugfs->d_name.name);
- rcu_read_unlock();
-
- if (len >= sizeof(name))
- goto out_err;
+ debugfs_create_file("tasks", S_IFREG | 0400, clnt->cl_debugfs, clnt,
+ &tasks_fops);
- if (!debugfs_create_symlink("xprt", clnt->cl_debugfs, name))
- goto out_err;
-
- return;
-out_err:
- debugfs_remove_recursive(clnt->cl_debugfs);
- clnt->cl_debugfs = NULL;
+ rpc_clnt_iterate_for_each_xprt(clnt, do_xprt_debugfs, &xprtnum);
}
void
@@ -226,9 +223,6 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
static atomic_t cur_id;
char name[9]; /* 8 hex digits + NULL term */
- if (!rpc_xprt_dir)
- return;
-
id = (unsigned int)atomic_inc_return(&cur_id);
len = snprintf(name, sizeof(name), "%x", id);
@@ -237,15 +231,10 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
/* make the per-client dir */
xprt->debugfs = debugfs_create_dir(name, rpc_xprt_dir);
- if (!xprt->debugfs)
- return;
/* make tasks file */
- if (!debugfs_create_file("info", S_IFREG | 0400, xprt->debugfs,
- xprt, &xprt_info_fops)) {
- debugfs_remove_recursive(xprt->debugfs);
- xprt->debugfs = NULL;
- }
+ debugfs_create_file("info", S_IFREG | 0400, xprt->debugfs, xprt,
+ &xprt_info_fops);
atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect);
}
@@ -308,28 +297,11 @@ static const struct file_operations fault_disconnect_fops = {
.release = fault_release,
};
-static struct dentry *
-inject_fault_dir(struct dentry *topdir)
-{
- struct dentry *faultdir;
-
- faultdir = debugfs_create_dir("inject_fault", topdir);
- if (!faultdir)
- return NULL;
-
- if (!debugfs_create_file("disconnect", S_IFREG | 0400, faultdir,
- NULL, &fault_disconnect_fops))
- return NULL;
-
- return faultdir;
-}
-
void __exit
sunrpc_debugfs_exit(void)
{
debugfs_remove_recursive(topdir);
topdir = NULL;
- rpc_fault_dir = NULL;
rpc_clnt_dir = NULL;
rpc_xprt_dir = NULL;
}
@@ -337,26 +309,16 @@ sunrpc_debugfs_exit(void)
void __init
sunrpc_debugfs_init(void)
{
- topdir = debugfs_create_dir("sunrpc", NULL);
- if (!topdir)
- return;
+ struct dentry *rpc_fault_dir;
- rpc_fault_dir = inject_fault_dir(topdir);
- if (!rpc_fault_dir)
- goto out_remove;
+ topdir = debugfs_create_dir("sunrpc", NULL);
rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir);
- if (!rpc_clnt_dir)
- goto out_remove;
rpc_xprt_dir = debugfs_create_dir("rpc_xprt", topdir);
- if (!rpc_xprt_dir)
- goto out_remove;
- return;
-out_remove:
- debugfs_remove_recursive(topdir);
- topdir = NULL;
- rpc_fault_dir = NULL;
- rpc_clnt_dir = NULL;
+ rpc_fault_dir = debugfs_create_dir("inject_fault", topdir);
+
+ debugfs_create_file("disconnect", S_IFREG | 0400, rpc_fault_dir, NULL,
+ &fault_disconnect_fops);
}
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 73bd62979fe7..748bac601e47 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -14,6 +14,7 @@
#include <linux/string.h>
#include <linux/pagemap.h>
#include <linux/mount.h>
+#include <linux/fs_context.h>
#include <linux/namei.h>
#include <linux/fsnotify.h>
#include <linux/kernel.h>
@@ -1352,11 +1353,11 @@ rpc_gssd_dummy_depopulate(struct dentry *pipe_dentry)
}
static int
-rpc_fill_super(struct super_block *sb, void *data, int silent)
+rpc_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct inode *inode;
struct dentry *root, *gssd_dentry;
- struct net *net = get_net(sb->s_fs_info);
+ struct net *net = sb->s_fs_info;
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
int err;
@@ -1413,12 +1414,29 @@ gssd_running(struct net *net)
}
EXPORT_SYMBOL_GPL(gssd_running);
-static struct dentry *
-rpc_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int rpc_fs_get_tree(struct fs_context *fc)
+{
+ fc->s_fs_info = get_net(fc->net_ns);
+ return vfs_get_super(fc, vfs_get_keyed_super, rpc_fill_super);
+}
+
+static void rpc_fs_free_fc(struct fs_context *fc)
{
- struct net *net = current->nsproxy->net_ns;
- return mount_ns(fs_type, flags, data, net, net->user_ns, rpc_fill_super);
+ if (fc->s_fs_info)
+ put_net(fc->s_fs_info);
+}
+
+static const struct fs_context_operations rpc_fs_context_ops = {
+ .free = rpc_fs_free_fc,
+ .get_tree = rpc_fs_get_tree,
+};
+
+static int rpc_init_fs_context(struct fs_context *fc)
+{
+ put_user_ns(fc->user_ns);
+ fc->user_ns = get_user_ns(fc->net_ns->user_ns);
+ fc->ops = &rpc_fs_context_ops;
+ return 0;
}
static void rpc_kill_sb(struct super_block *sb)
@@ -1446,7 +1464,7 @@ out:
static struct file_system_type rpc_pipe_fs_type = {
.owner = THIS_MODULE,
.name = "rpc_pipefs",
- .mount = rpc_mount,
+ .init_fs_context = rpc_init_fs_context,
.kill_sb = rpc_kill_sb,
};
MODULE_ALIAS_FS("rpc_pipefs");
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index a2c114812717..1f275aba786f 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -23,6 +23,7 @@
#include <linux/sched/mm.h>
#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/metrics.h>
#include "sunrpc.h"
@@ -46,7 +47,7 @@ static mempool_t *rpc_buffer_mempool __read_mostly;
static void rpc_async_schedule(struct work_struct *);
static void rpc_release_task(struct rpc_task *task);
-static void __rpc_queue_timer_fn(struct timer_list *t);
+static void __rpc_queue_timer_fn(struct work_struct *);
/*
* RPC tasks sit here while waiting for conditions to improve.
@@ -58,6 +59,7 @@ static struct rpc_wait_queue delay_queue;
*/
struct workqueue_struct *rpciod_workqueue __read_mostly;
struct workqueue_struct *xprtiod_workqueue __read_mostly;
+EXPORT_SYMBOL_GPL(xprtiod_workqueue);
unsigned long
rpc_task_timeout(const struct rpc_task *task)
@@ -87,13 +89,19 @@ __rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
task->tk_timeout = 0;
list_del(&task->u.tk_wait.timer_list);
if (list_empty(&queue->timer_list.list))
- del_timer(&queue->timer_list.timer);
+ cancel_delayed_work(&queue->timer_list.dwork);
}
static void
rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires)
{
- timer_reduce(&queue->timer_list.timer, expires);
+ unsigned long now = jiffies;
+ queue->timer_list.expires = expires;
+ if (time_before_eq(expires, now))
+ expires = 0;
+ else
+ expires -= now;
+ mod_delayed_work(rpciod_workqueue, &queue->timer_list.dwork, expires);
}
/*
@@ -107,7 +115,8 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task,
task->tk_pid, jiffies_to_msecs(timeout - jiffies));
task->tk_timeout = timeout;
- rpc_set_queue_timer(queue, timeout);
+ if (list_empty(&queue->timer_list.list) || time_before(timeout, queue->timer_list.expires))
+ rpc_set_queue_timer(queue, timeout);
list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
}
@@ -250,7 +259,8 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c
queue->maxpriority = nr_queues - 1;
rpc_reset_waitqueue_priority(queue);
queue->qlen = 0;
- timer_setup(&queue->timer_list.timer, __rpc_queue_timer_fn, 0);
+ queue->timer_list.expires = 0;
+ INIT_DEFERRABLE_WORK(&queue->timer_list.dwork, __rpc_queue_timer_fn);
INIT_LIST_HEAD(&queue->timer_list.list);
rpc_assign_waitqueue_name(queue, qname);
}
@@ -269,7 +279,7 @@ EXPORT_SYMBOL_GPL(rpc_init_wait_queue);
void rpc_destroy_wait_queue(struct rpc_wait_queue *queue)
{
- del_timer_sync(&queue->timer_list.timer);
+ cancel_delayed_work_sync(&queue->timer_list.dwork);
}
EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
@@ -424,9 +434,9 @@ void rpc_sleep_on_timeout(struct rpc_wait_queue *q, struct rpc_task *task,
/*
* Protect the queue operations.
*/
- spin_lock_bh(&q->lock);
+ spin_lock(&q->lock);
__rpc_sleep_on_priority_timeout(q, task, timeout, task->tk_priority);
- spin_unlock_bh(&q->lock);
+ spin_unlock(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on_timeout);
@@ -442,9 +452,9 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
/*
* Protect the queue operations.
*/
- spin_lock_bh(&q->lock);
+ spin_lock(&q->lock);
__rpc_sleep_on_priority(q, task, task->tk_priority);
- spin_unlock_bh(&q->lock);
+ spin_unlock(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on);
@@ -458,9 +468,9 @@ void rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q,
/*
* Protect the queue operations.
*/
- spin_lock_bh(&q->lock);
+ spin_lock(&q->lock);
__rpc_sleep_on_priority_timeout(q, task, timeout, priority);
- spin_unlock_bh(&q->lock);
+ spin_unlock(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on_priority_timeout);
@@ -475,9 +485,9 @@ void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task,
/*
* Protect the queue operations.
*/
- spin_lock_bh(&q->lock);
+ spin_lock(&q->lock);
__rpc_sleep_on_priority(q, task, priority);
- spin_unlock_bh(&q->lock);
+ spin_unlock(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on_priority);
@@ -555,9 +565,9 @@ void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
{
if (!RPC_IS_QUEUED(task))
return;
- spin_lock_bh(&queue->lock);
+ spin_lock(&queue->lock);
rpc_wake_up_task_on_wq_queue_locked(wq, queue, task);
- spin_unlock_bh(&queue->lock);
+ spin_unlock(&queue->lock);
}
/*
@@ -567,9 +577,9 @@ void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task
{
if (!RPC_IS_QUEUED(task))
return;
- spin_lock_bh(&queue->lock);
+ spin_lock(&queue->lock);
rpc_wake_up_task_queue_locked(queue, task);
- spin_unlock_bh(&queue->lock);
+ spin_unlock(&queue->lock);
}
EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task);
@@ -602,9 +612,9 @@ rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *queue,
{
if (!RPC_IS_QUEUED(task))
return;
- spin_lock_bh(&queue->lock);
+ spin_lock(&queue->lock);
rpc_wake_up_task_queue_set_status_locked(queue, task, status);
- spin_unlock_bh(&queue->lock);
+ spin_unlock(&queue->lock);
}
/*
@@ -667,12 +677,12 @@ struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq,
dprintk("RPC: wake_up_first(%p \"%s\")\n",
queue, rpc_qname(queue));
- spin_lock_bh(&queue->lock);
+ spin_lock(&queue->lock);
task = __rpc_find_next_queued(queue);
if (task != NULL)
task = rpc_wake_up_task_on_wq_queue_action_locked(wq, queue,
task, func, data);
- spin_unlock_bh(&queue->lock);
+ spin_unlock(&queue->lock);
return task;
}
@@ -711,7 +721,7 @@ void rpc_wake_up(struct rpc_wait_queue *queue)
{
struct list_head *head;
- spin_lock_bh(&queue->lock);
+ spin_lock(&queue->lock);
head = &queue->tasks[queue->maxpriority];
for (;;) {
while (!list_empty(head)) {
@@ -725,7 +735,7 @@ void rpc_wake_up(struct rpc_wait_queue *queue)
break;
head--;
}
- spin_unlock_bh(&queue->lock);
+ spin_unlock(&queue->lock);
}
EXPORT_SYMBOL_GPL(rpc_wake_up);
@@ -740,7 +750,7 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
{
struct list_head *head;
- spin_lock_bh(&queue->lock);
+ spin_lock(&queue->lock);
head = &queue->tasks[queue->maxpriority];
for (;;) {
while (!list_empty(head)) {
@@ -755,13 +765,15 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
break;
head--;
}
- spin_unlock_bh(&queue->lock);
+ spin_unlock(&queue->lock);
}
EXPORT_SYMBOL_GPL(rpc_wake_up_status);
-static void __rpc_queue_timer_fn(struct timer_list *t)
+static void __rpc_queue_timer_fn(struct work_struct *work)
{
- struct rpc_wait_queue *queue = from_timer(queue, t, timer_list.timer);
+ struct rpc_wait_queue *queue = container_of(work,
+ struct rpc_wait_queue,
+ timer_list.dwork.work);
struct rpc_task *task, *n;
unsigned long expires, now, timeo;
@@ -832,6 +844,10 @@ rpc_reset_task_statistics(struct rpc_task *task)
void rpc_exit_task(struct rpc_task *task)
{
task->tk_action = NULL;
+ if (task->tk_ops->rpc_count_stats)
+ task->tk_ops->rpc_count_stats(task, task->tk_calldata);
+ else if (task->tk_client)
+ rpc_count_iostats(task, task->tk_client->cl_metrics);
if (task->tk_ops->rpc_call_done != NULL) {
task->tk_ops->rpc_call_done(task, task->tk_calldata);
if (task->tk_action != NULL) {
@@ -927,13 +943,13 @@ static void __rpc_execute(struct rpc_task *task)
* rpc_task pointer may still be dereferenced.
*/
queue = task->tk_waitqueue;
- spin_lock_bh(&queue->lock);
+ spin_lock(&queue->lock);
if (!RPC_IS_QUEUED(task)) {
- spin_unlock_bh(&queue->lock);
+ spin_unlock(&queue->lock);
continue;
}
rpc_clear_running(task);
- spin_unlock_bh(&queue->lock);
+ spin_unlock(&queue->lock);
if (task_is_async)
return;
@@ -1076,7 +1092,8 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
/* Initialize workqueue for async tasks */
task->tk_workqueue = task_setup_data->workqueue;
- task->tk_xprt = xprt_get(task_setup_data->rpc_xprt);
+ task->tk_xprt = rpc_task_get_xprt(task_setup_data->rpc_client,
+ xprt_get(task_setup_data->rpc_xprt));
task->tk_op_cred = get_rpccred(task_setup_data->rpc_op_cred);
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 2b6dc7e5f74f..7c74197c2ecf 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -177,6 +177,8 @@ void rpc_count_iostats_metrics(const struct rpc_task *task,
execute = ktime_sub(now, task->tk_start);
op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute);
+ if (task->tk_status < 0)
+ op_metrics->om_error_status++;
spin_unlock(&op_metrics->om_lock);
@@ -219,13 +221,14 @@ static void _add_rpc_iostats(struct rpc_iostats *a, struct rpc_iostats *b)
a->om_queue = ktime_add(a->om_queue, b->om_queue);
a->om_rtt = ktime_add(a->om_rtt, b->om_rtt);
a->om_execute = ktime_add(a->om_execute, b->om_execute);
+ a->om_error_status += b->om_error_status;
}
static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats,
int op, const struct rpc_procinfo *procs)
{
_print_name(seq, op, procs);
- seq_printf(seq, "%lu %lu %lu %Lu %Lu %Lu %Lu %Lu\n",
+ seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu %lu\n",
stats->om_ops,
stats->om_ntrans,
stats->om_timeouts,
@@ -233,12 +236,20 @@ static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats,
stats->om_bytes_recv,
ktime_to_ms(stats->om_queue),
ktime_to_ms(stats->om_rtt),
- ktime_to_ms(stats->om_execute));
+ ktime_to_ms(stats->om_execute),
+ stats->om_error_status);
+}
+
+static int do_print_stats(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *seqv)
+{
+ struct seq_file *seq = seqv;
+
+ xprt->ops->print_stats(xprt, seq);
+ return 0;
}
void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt)
{
- struct rpc_xprt *xprt;
unsigned int op, maxproc = clnt->cl_maxproc;
if (!clnt->cl_metrics)
@@ -248,11 +259,7 @@ void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt)
seq_printf(seq, "p/v: %u/%u (%s)\n",
clnt->cl_prog, clnt->cl_vers, clnt->cl_program->name);
- rcu_read_lock();
- xprt = rcu_dereference(clnt->cl_xprt);
- if (xprt)
- xprt->ops->print_stats(xprt, seq);
- rcu_read_unlock();
+ rpc_clnt_iterate_for_each_xprt(clnt, do_print_stats, seq);
seq_printf(seq, "\tper-op statistics\n");
for (op = 0; op < maxproc; op++) {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index e15cb704453e..220b79988000 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1595,7 +1595,7 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
/* Parse and execute the bc call */
proc_error = svc_process_common(rqstp, argv, resv);
- atomic_inc(&req->rq_xprt->bc_free_slots);
+ atomic_dec(&req->rq_xprt->bc_slot_count);
if (!proc_error) {
/* Processing error: drop the request */
xprt_free_bc_request(req);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index f6c82b1651e7..783748dc5e6f 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -302,9 +302,9 @@ static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
if (test_bit(XPRT_LOCKED, &xprt->state) && xprt->snd_task == task)
return 1;
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
retval = xprt->ops->reserve_xprt(xprt, task);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
return retval;
}
@@ -381,9 +381,9 @@ static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *ta
{
if (xprt->snd_task != task)
return;
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
xprt->ops->release_xprt(xprt, task);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
}
/*
@@ -435,9 +435,9 @@ xprt_request_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
if (req->rq_cong)
return true;
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
ret = __xprt_get_cong(xprt, req) != 0;
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
return ret;
}
EXPORT_SYMBOL_GPL(xprt_request_get_cong);
@@ -464,9 +464,9 @@ static void
xprt_clear_congestion_window_wait(struct rpc_xprt *xprt)
{
if (test_and_clear_bit(XPRT_CWND_WAIT, &xprt->state)) {
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
__xprt_lock_write_next_cong(xprt);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
}
}
@@ -563,9 +563,9 @@ bool xprt_write_space(struct rpc_xprt *xprt)
if (!test_bit(XPRT_WRITE_SPACE, &xprt->state))
return false;
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
ret = xprt_clear_write_space_locked(xprt);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
return ret;
}
EXPORT_SYMBOL_GPL(xprt_write_space);
@@ -634,9 +634,9 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
req->rq_retries = 0;
xprt_reset_majortimeo(req);
/* Reset the RTT counters == "slow start" */
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
status = -ETIMEDOUT;
}
@@ -668,11 +668,11 @@ static void xprt_autoclose(struct work_struct *work)
void xprt_disconnect_done(struct rpc_xprt *xprt)
{
dprintk("RPC: disconnected transport %p\n", xprt);
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
xprt_clear_connected(xprt);
xprt_clear_write_space_locked(xprt);
xprt_wake_pending_tasks(xprt, -ENOTCONN);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
}
EXPORT_SYMBOL_GPL(xprt_disconnect_done);
@@ -684,7 +684,7 @@ EXPORT_SYMBOL_GPL(xprt_disconnect_done);
void xprt_force_disconnect(struct rpc_xprt *xprt)
{
/* Don't race with the test_bit() in xprt_clear_locked() */
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
set_bit(XPRT_CLOSE_WAIT, &xprt->state);
/* Try to schedule an autoclose RPC call */
if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
@@ -692,7 +692,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
else if (xprt->snd_task)
rpc_wake_up_queued_task_set_status(&xprt->pending,
xprt->snd_task, -ENOTCONN);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
}
EXPORT_SYMBOL_GPL(xprt_force_disconnect);
@@ -726,7 +726,7 @@ xprt_request_retransmit_after_disconnect(struct rpc_task *task)
void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie)
{
/* Don't race with the test_bit() in xprt_clear_locked() */
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
if (cookie != xprt->connect_cookie)
goto out;
if (test_bit(XPRT_CLOSING, &xprt->state))
@@ -737,7 +737,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie)
queue_work(xprtiod_workqueue, &xprt->task_cleanup);
xprt_wake_pending_tasks(xprt, -EAGAIN);
out:
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
}
static bool
@@ -750,6 +750,7 @@ static void
xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
__must_hold(&xprt->transport_lock)
{
+ xprt->last_used = jiffies;
if (RB_EMPTY_ROOT(&xprt->recv_queue) && xprt_has_timer(xprt))
mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout);
}
@@ -759,18 +760,13 @@ xprt_init_autodisconnect(struct timer_list *t)
{
struct rpc_xprt *xprt = from_timer(xprt, t, timer);
- spin_lock(&xprt->transport_lock);
if (!RB_EMPTY_ROOT(&xprt->recv_queue))
- goto out_abort;
+ return;
/* Reset xprt->last_used to avoid connect/autodisconnect cycling */
xprt->last_used = jiffies;
if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
- goto out_abort;
- spin_unlock(&xprt->transport_lock);
+ return;
queue_work(xprtiod_workqueue, &xprt->task_cleanup);
- return;
-out_abort:
- spin_unlock(&xprt->transport_lock);
}
bool xprt_lock_connect(struct rpc_xprt *xprt,
@@ -779,7 +775,7 @@ bool xprt_lock_connect(struct rpc_xprt *xprt,
{
bool ret = false;
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
if (!test_bit(XPRT_LOCKED, &xprt->state))
goto out;
if (xprt->snd_task != task)
@@ -787,13 +783,13 @@ bool xprt_lock_connect(struct rpc_xprt *xprt,
xprt->snd_task = cookie;
ret = true;
out:
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
return ret;
}
void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
{
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
if (xprt->snd_task != cookie)
goto out;
if (!test_bit(XPRT_LOCKED, &xprt->state))
@@ -802,7 +798,7 @@ void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
xprt->ops->release_xprt(xprt, NULL);
xprt_schedule_autodisconnect(xprt);
out:
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
wake_up_bit(&xprt->state, XPRT_LOCKED);
}
@@ -850,6 +846,38 @@ void xprt_connect(struct rpc_task *task)
xprt_release_write(xprt, task);
}
+/**
+ * xprt_reconnect_delay - compute the wait before scheduling a connect
+ * @xprt: transport instance
+ *
+ */
+unsigned long xprt_reconnect_delay(const struct rpc_xprt *xprt)
+{
+ unsigned long start, now = jiffies;
+
+ start = xprt->stat.connect_start + xprt->reestablish_timeout;
+ if (time_after(start, now))
+ return start - now;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xprt_reconnect_delay);
+
+/**
+ * xprt_reconnect_backoff - compute the new re-establish timeout
+ * @xprt: transport instance
+ * @init_to: initial reestablish timeout
+ *
+ */
+void xprt_reconnect_backoff(struct rpc_xprt *xprt, unsigned long init_to)
+{
+ xprt->reestablish_timeout <<= 1;
+ if (xprt->reestablish_timeout > xprt->max_reconnect_timeout)
+ xprt->reestablish_timeout = xprt->max_reconnect_timeout;
+ if (xprt->reestablish_timeout < init_to)
+ xprt->reestablish_timeout = init_to;
+}
+EXPORT_SYMBOL_GPL(xprt_reconnect_backoff);
+
enum xprt_xid_rb_cmp {
XID_RB_EQUAL,
XID_RB_LEFT,
@@ -1013,6 +1041,8 @@ xprt_request_enqueue_receive(struct rpc_task *task)
if (!xprt_request_need_enqueue_receive(task, req))
return;
+
+ xprt_request_prepare(task->tk_rqstp);
spin_lock(&xprt->queue_lock);
/* Update the softirq receive buffer */
@@ -1412,14 +1442,14 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
xprt_inject_disconnect(xprt);
task->tk_flags |= RPC_TASK_SENT;
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
xprt->stat.sends++;
xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
xprt->stat.bklog_u += xprt->backlog.qlen;
xprt->stat.sending_u += xprt->sending.qlen;
xprt->stat.pending_u += xprt->pending.qlen;
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
req->rq_connect_cookie = connect_cookie;
out_dequeue:
@@ -1765,18 +1795,13 @@ void xprt_release(struct rpc_task *task)
}
xprt = req->rq_xprt;
- if (task->tk_ops->rpc_count_stats != NULL)
- task->tk_ops->rpc_count_stats(task, task->tk_calldata);
- else if (task->tk_client)
- rpc_count_iostats(task, task->tk_client->cl_metrics);
xprt_request_dequeue_all(task, req);
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
xprt->ops->release_xprt(xprt, task);
if (xprt->ops->release_request)
xprt->ops->release_request(task);
- xprt->last_used = jiffies;
xprt_schedule_autodisconnect(xprt);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
if (req->rq_buffer)
xprt->ops->buf_free(task);
xprt_inject_disconnect(xprt);
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index 8394124126f8..78c075a68c04 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -19,7 +19,7 @@
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/xprtmultipath.h>
-typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct list_head *head,
+typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct rpc_xprt_switch *xps,
const struct rpc_xprt *cur);
static const struct rpc_xprt_iter_ops rpc_xprt_iter_singular;
@@ -36,6 +36,7 @@ static void xprt_switch_add_xprt_locked(struct rpc_xprt_switch *xps,
if (xps->xps_nxprts == 0)
xps->xps_net = xprt->xprt_net;
xps->xps_nxprts++;
+ xps->xps_nactive++;
}
/**
@@ -51,8 +52,7 @@ void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps,
if (xprt == NULL)
return;
spin_lock(&xps->xps_lock);
- if ((xps->xps_net == xprt->xprt_net || xps->xps_net == NULL) &&
- !rpc_xprt_switch_has_addr(xps, (struct sockaddr *)&xprt->addr))
+ if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL)
xprt_switch_add_xprt_locked(xps, xprt);
spin_unlock(&xps->xps_lock);
}
@@ -62,6 +62,7 @@ static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps,
{
if (unlikely(xprt == NULL))
return;
+ xps->xps_nactive--;
xps->xps_nxprts--;
if (xps->xps_nxprts == 0)
xps->xps_net = NULL;
@@ -102,7 +103,9 @@ struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt,
if (xps != NULL) {
spin_lock_init(&xps->xps_lock);
kref_init(&xps->xps_kref);
- xps->xps_nxprts = 0;
+ xps->xps_nxprts = xps->xps_nactive = 0;
+ atomic_long_set(&xps->xps_queuelen, 0);
+ xps->xps_net = NULL;
INIT_LIST_HEAD(&xps->xps_xprt_list);
xps->xps_iter_ops = &rpc_xprt_iter_singular;
xprt_switch_add_xprt_locked(xps, xprt);
@@ -193,9 +196,21 @@ void xprt_iter_default_rewind(struct rpc_xprt_iter *xpi)
}
static
+bool xprt_is_active(const struct rpc_xprt *xprt)
+{
+ return kref_read(&xprt->kref) != 0;
+}
+
+static
struct rpc_xprt *xprt_switch_find_first_entry(struct list_head *head)
{
- return list_first_or_null_rcu(head, struct rpc_xprt, xprt_switch);
+ struct rpc_xprt *pos;
+
+ list_for_each_entry_rcu(pos, head, xprt_switch) {
+ if (xprt_is_active(pos))
+ return pos;
+ }
+ return NULL;
}
static
@@ -213,9 +228,12 @@ struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head,
const struct rpc_xprt *cur)
{
struct rpc_xprt *pos;
+ bool found = false;
list_for_each_entry_rcu(pos, head, xprt_switch) {
if (cur == pos)
+ found = true;
+ if (found && xprt_is_active(pos))
return pos;
}
return NULL;
@@ -260,9 +278,12 @@ struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
const struct rpc_xprt *cur)
{
struct rpc_xprt *pos, *prev = NULL;
+ bool found = false;
list_for_each_entry_rcu(pos, head, xprt_switch) {
if (cur == prev)
+ found = true;
+ if (found && xprt_is_active(pos))
return pos;
prev = pos;
}
@@ -270,22 +291,15 @@ struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
}
static
-struct rpc_xprt *xprt_switch_set_next_cursor(struct list_head *head,
+struct rpc_xprt *xprt_switch_set_next_cursor(struct rpc_xprt_switch *xps,
struct rpc_xprt **cursor,
xprt_switch_find_xprt_t find_next)
{
- struct rpc_xprt *cur, *pos, *old;
+ struct rpc_xprt *pos, *old;
- cur = READ_ONCE(*cursor);
- for (;;) {
- old = cur;
- pos = find_next(head, old);
- if (pos == NULL)
- break;
- cur = cmpxchg_relaxed(cursor, old, pos);
- if (cur == old)
- break;
- }
+ old = smp_load_acquire(cursor);
+ pos = find_next(xps, old);
+ smp_store_release(cursor, pos);
return pos;
}
@@ -297,13 +311,11 @@ struct rpc_xprt *xprt_iter_next_entry_multiple(struct rpc_xprt_iter *xpi,
if (xps == NULL)
return NULL;
- return xprt_switch_set_next_cursor(&xps->xps_xprt_list,
- &xpi->xpi_cursor,
- find_next);
+ return xprt_switch_set_next_cursor(xps, &xpi->xpi_cursor, find_next);
}
static
-struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct list_head *head,
+struct rpc_xprt *__xprt_switch_find_next_entry_roundrobin(struct list_head *head,
const struct rpc_xprt *cur)
{
struct rpc_xprt *ret;
@@ -315,6 +327,31 @@ struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct list_head *head,
}
static
+struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct rpc_xprt_switch *xps,
+ const struct rpc_xprt *cur)
+{
+ struct list_head *head = &xps->xps_xprt_list;
+ struct rpc_xprt *xprt;
+ unsigned int nactive;
+
+ for (;;) {
+ unsigned long xprt_queuelen, xps_queuelen;
+
+ xprt = __xprt_switch_find_next_entry_roundrobin(head, cur);
+ if (!xprt)
+ break;
+ xprt_queuelen = atomic_long_read(&xprt->queuelen);
+ xps_queuelen = atomic_long_read(&xps->xps_queuelen);
+ nactive = READ_ONCE(xps->xps_nactive);
+ /* Exit loop if xprt_queuelen <= average queue length */
+ if (xprt_queuelen * nactive <= xps_queuelen)
+ break;
+ cur = xprt;
+ }
+ return xprt;
+}
+
+static
struct rpc_xprt *xprt_iter_next_entry_roundrobin(struct rpc_xprt_iter *xpi)
{
return xprt_iter_next_entry_multiple(xpi,
@@ -322,9 +359,17 @@ struct rpc_xprt *xprt_iter_next_entry_roundrobin(struct rpc_xprt_iter *xpi)
}
static
+struct rpc_xprt *xprt_switch_find_next_entry_all(struct rpc_xprt_switch *xps,
+ const struct rpc_xprt *cur)
+{
+ return xprt_switch_find_next_entry(&xps->xps_xprt_list, cur);
+}
+
+static
struct rpc_xprt *xprt_iter_next_entry_all(struct rpc_xprt_iter *xpi)
{
- return xprt_iter_next_entry_multiple(xpi, xprt_switch_find_next_entry);
+ return xprt_iter_next_entry_multiple(xpi,
+ xprt_switch_find_next_entry_all);
}
/*
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index ce986591f213..59e624b1d7a0 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -52,6 +52,13 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
return maxmsg - RPCRDMA_HDRLEN_MIN;
}
+unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *xprt)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+
+ return r_xprt->rx_buf.rb_bc_srv_max_requests;
+}
+
static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 794ba4ca0994..0b6dad7580a1 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -144,6 +144,26 @@ frwr_mr_recycle_worker(struct work_struct *work)
frwr_release_mr(mr);
}
+/* frwr_reset - Place MRs back on the free list
+ * @req: request to reset
+ *
+ * Used after a failed marshal. For FRWR, this means the MRs
+ * don't have to be fully released and recreated.
+ *
+ * NB: This is safe only as long as none of @req's MRs are
+ * involved with an ongoing asynchronous FAST_REG or LOCAL_INV
+ * Work Request.
+ */
+void frwr_reset(struct rpcrdma_req *req)
+{
+ while (!list_empty(&req->rl_registered)) {
+ struct rpcrdma_mr *mr;
+
+ mr = rpcrdma_mr_pop(&req->rl_registered);
+ rpcrdma_mr_unmap_and_put(mr);
+ }
+}
+
/**
* frwr_init_mr - Initialize one MR
* @ia: interface adapter
@@ -168,7 +188,6 @@ int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
goto out_list_err;
mr->frwr.fr_mr = frmr;
- mr->frwr.fr_state = FRWR_IS_INVALID;
mr->mr_dir = DMA_NONE;
INIT_LIST_HEAD(&mr->mr_list);
INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker);
@@ -298,65 +317,6 @@ size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt)
}
/**
- * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC
- * @cq: completion queue (ignored)
- * @wc: completed WR
- *
- */
-static void
-frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
-{
- struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr =
- container_of(cqe, struct rpcrdma_frwr, fr_cqe);
-
- /* WARNING: Only wr_cqe and status are reliable at this point */
- if (wc->status != IB_WC_SUCCESS)
- frwr->fr_state = FRWR_FLUSHED_FR;
- trace_xprtrdma_wc_fastreg(wc, frwr);
-}
-
-/**
- * frwr_wc_localinv - Invoked by RDMA provider for a flushed LocalInv WC
- * @cq: completion queue (ignored)
- * @wc: completed WR
- *
- */
-static void
-frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
-{
- struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
- fr_cqe);
-
- /* WARNING: Only wr_cqe and status are reliable at this point */
- if (wc->status != IB_WC_SUCCESS)
- frwr->fr_state = FRWR_FLUSHED_LI;
- trace_xprtrdma_wc_li(wc, frwr);
-}
-
-/**
- * frwr_wc_localinv_wake - Invoked by RDMA provider for a signaled LocalInv WC
- * @cq: completion queue (ignored)
- * @wc: completed WR
- *
- * Awaken anyone waiting for an MR to finish being fenced.
- */
-static void
-frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
-{
- struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
- fr_cqe);
-
- /* WARNING: Only wr_cqe and status are reliable at this point */
- if (wc->status != IB_WC_SUCCESS)
- frwr->fr_state = FRWR_FLUSHED_LI;
- trace_xprtrdma_wc_li_wake(wc, frwr);
- complete(&frwr->fr_linv_done);
-}
-
-/**
* frwr_map - Register a memory region
* @r_xprt: controlling transport
* @seg: memory region co-ordinates
@@ -378,23 +338,15 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
- struct rpcrdma_frwr *frwr;
struct rpcrdma_mr *mr;
struct ib_mr *ibmr;
struct ib_reg_wr *reg_wr;
int i, n;
u8 key;
- mr = NULL;
- do {
- if (mr)
- rpcrdma_mr_recycle(mr);
- mr = rpcrdma_mr_get(r_xprt);
- if (!mr)
- return ERR_PTR(-EAGAIN);
- } while (mr->frwr.fr_state != FRWR_IS_INVALID);
- frwr = &mr->frwr;
- frwr->fr_state = FRWR_IS_VALID;
+ mr = rpcrdma_mr_get(r_xprt);
+ if (!mr)
+ goto out_getmr_err;
if (nsegs > ia->ri_max_frwr_depth)
nsegs = ia->ri_max_frwr_depth;
@@ -423,7 +375,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
if (!mr->mr_nents)
goto out_dmamap_err;
- ibmr = frwr->fr_mr;
+ ibmr = mr->frwr.fr_mr;
n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
if (unlikely(n != mr->mr_nents))
goto out_mapmr_err;
@@ -433,7 +385,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
key = (u8)(ibmr->rkey & 0x000000FF);
ib_update_fast_reg_key(ibmr, ++key);
- reg_wr = &frwr->fr_regwr;
+ reg_wr = &mr->frwr.fr_regwr;
reg_wr->mr = ibmr;
reg_wr->key = ibmr->rkey;
reg_wr->access = writing ?
@@ -448,6 +400,10 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
*out = mr;
return seg;
+out_getmr_err:
+ xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
+ return ERR_PTR(-EAGAIN);
+
out_dmamap_err:
mr->mr_dir = DMA_NONE;
trace_xprtrdma_frwr_sgerr(mr, i);
@@ -461,6 +417,23 @@ out_mapmr_err:
}
/**
+ * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC
+ * @cq: completion queue (ignored)
+ * @wc: completed WR
+ *
+ */
+static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_frwr *frwr =
+ container_of(cqe, struct rpcrdma_frwr, fr_cqe);
+
+ /* WARNING: Only wr_cqe and status are reliable at this point */
+ trace_xprtrdma_wc_fastreg(wc, frwr);
+ /* The MR will get recycled when the associated req is retransmitted */
+}
+
+/**
* frwr_send - post Send WR containing the RPC Call message
* @ia: interface adapter
* @req: Prepared RPC Call
@@ -512,31 +485,75 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
if (mr->mr_handle == rep->rr_inv_rkey) {
list_del_init(&mr->mr_list);
trace_xprtrdma_mr_remoteinv(mr);
- mr->frwr.fr_state = FRWR_IS_INVALID;
rpcrdma_mr_unmap_and_put(mr);
break; /* only one invalidated MR per RPC */
}
}
+static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr)
+{
+ if (wc->status != IB_WC_SUCCESS)
+ rpcrdma_mr_recycle(mr);
+ else
+ rpcrdma_mr_unmap_and_put(mr);
+}
+
/**
- * frwr_unmap_sync - invalidate memory regions that were registered for @req
- * @r_xprt: controlling transport
- * @mrs: list of MRs to process
+ * frwr_wc_localinv - Invoked by RDMA provider for a LOCAL_INV WC
+ * @cq: completion queue (ignored)
+ * @wc: completed WR
+ *
+ */
+static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_frwr *frwr =
+ container_of(cqe, struct rpcrdma_frwr, fr_cqe);
+ struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+
+ /* WARNING: Only wr_cqe and status are reliable at this point */
+ trace_xprtrdma_wc_li(wc, frwr);
+ __frwr_release_mr(wc, mr);
+}
+
+/**
+ * frwr_wc_localinv_wake - Invoked by RDMA provider for a LOCAL_INV WC
+ * @cq: completion queue (ignored)
+ * @wc: completed WR
*
- * Sleeps until it is safe for the host CPU to access the
- * previously mapped memory regions.
+ * Awaken anyone waiting for an MR to finish being fenced.
+ */
+static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_frwr *frwr =
+ container_of(cqe, struct rpcrdma_frwr, fr_cqe);
+ struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+
+ /* WARNING: Only wr_cqe and status are reliable at this point */
+ trace_xprtrdma_wc_li_wake(wc, frwr);
+ complete(&frwr->fr_linv_done);
+ __frwr_release_mr(wc, mr);
+}
+
+/**
+ * frwr_unmap_sync - invalidate memory regions that were registered for @req
+ * @r_xprt: controlling transport instance
+ * @req: rpcrdma_req with a non-empty list of MRs to process
*
- * Caller ensures that @mrs is not empty before the call. This
- * function empties the list.
+ * Sleeps until it is safe for the host CPU to access the previously mapped
+ * memory regions. This guarantees that registered MRs are properly fenced
+ * from the server before the RPC consumer accesses the data in them. It
+ * also ensures proper Send flow control: waking the next RPC waits until
+ * this RPC has relinquished all its Send Queue entries.
*/
-void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
+void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct ib_send_wr *first, **prev, *last;
const struct ib_send_wr *bad_wr;
- struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_frwr *frwr;
struct rpcrdma_mr *mr;
- int count, rc;
+ int rc;
/* ORDER: Invalidate all of the MRs first
*
@@ -544,33 +561,32 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
* a single ib_post_send() call.
*/
frwr = NULL;
- count = 0;
prev = &first;
- list_for_each_entry(mr, mrs, mr_list) {
- mr->frwr.fr_state = FRWR_IS_INVALID;
+ while (!list_empty(&req->rl_registered)) {
+ mr = rpcrdma_mr_pop(&req->rl_registered);
- frwr = &mr->frwr;
trace_xprtrdma_mr_localinv(mr);
+ r_xprt->rx_stats.local_inv_needed++;
+ frwr = &mr->frwr;
frwr->fr_cqe.done = frwr_wc_localinv;
last = &frwr->fr_invwr;
- memset(last, 0, sizeof(*last));
+ last->next = NULL;
last->wr_cqe = &frwr->fr_cqe;
+ last->sg_list = NULL;
+ last->num_sge = 0;
last->opcode = IB_WR_LOCAL_INV;
+ last->send_flags = IB_SEND_SIGNALED;
last->ex.invalidate_rkey = mr->mr_handle;
- count++;
*prev = last;
prev = &last->next;
}
- if (!frwr)
- goto unmap;
/* Strong send queue ordering guarantees that when the
* last WR in the chain completes, all WRs in the chain
* are complete.
*/
- last->send_flags = IB_SEND_SIGNALED;
frwr->fr_cqe.done = frwr_wc_localinv_wake;
reinit_completion(&frwr->fr_linv_done);
@@ -578,37 +594,126 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
* replaces the QP. The RPC reply handler won't call us
* unless ri_id->qp is a valid pointer.
*/
- r_xprt->rx_stats.local_inv_needed++;
bad_wr = NULL;
- rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
+ rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr);
+ trace_xprtrdma_post_send(req, rc);
+
+ /* The final LOCAL_INV WR in the chain is supposed to
+ * do the wake. If it was never posted, the wake will
+ * not happen, so don't wait in that case.
+ */
if (bad_wr != first)
wait_for_completion(&frwr->fr_linv_done);
- if (rc)
- goto out_release;
+ if (!rc)
+ return;
- /* ORDER: Now DMA unmap all of the MRs, and return
- * them to the free MR list.
+ /* Recycle MRs in the LOCAL_INV chain that did not get posted.
*/
-unmap:
- while (!list_empty(mrs)) {
- mr = rpcrdma_mr_pop(mrs);
- rpcrdma_mr_unmap_and_put(mr);
+ while (bad_wr) {
+ frwr = container_of(bad_wr, struct rpcrdma_frwr,
+ fr_invwr);
+ mr = container_of(frwr, struct rpcrdma_mr, frwr);
+ bad_wr = bad_wr->next;
+
+ list_del_init(&mr->mr_list);
+ rpcrdma_mr_recycle(mr);
}
- return;
+}
-out_release:
- pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);
+/**
+ * frwr_wc_localinv_done - Invoked by RDMA provider for a signaled LOCAL_INV WC
+ * @cq: completion queue (ignored)
+ * @wc: completed WR
+ *
+ */
+static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_frwr *frwr =
+ container_of(cqe, struct rpcrdma_frwr, fr_cqe);
+ struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
- /* Unmap and release the MRs in the LOCAL_INV WRs that did not
- * get posted.
+ /* WARNING: Only wr_cqe and status are reliable at this point */
+ trace_xprtrdma_wc_li_done(wc, frwr);
+ rpcrdma_complete_rqst(frwr->fr_req->rl_reply);
+ __frwr_release_mr(wc, mr);
+}
+
+/**
+ * frwr_unmap_async - invalidate memory regions that were registered for @req
+ * @r_xprt: controlling transport instance
+ * @req: rpcrdma_req with a non-empty list of MRs to process
+ *
+ * This guarantees that registered MRs are properly fenced from the
+ * server before the RPC consumer accesses the data in them. It also
+ * ensures proper Send flow control: waking the next RPC waits until
+ * this RPC has relinquished all its Send Queue entries.
+ */
+void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+{
+ struct ib_send_wr *first, *last, **prev;
+ const struct ib_send_wr *bad_wr;
+ struct rpcrdma_frwr *frwr;
+ struct rpcrdma_mr *mr;
+ int rc;
+
+ /* Chain the LOCAL_INV Work Requests and post them with
+ * a single ib_post_send() call.
+ */
+ frwr = NULL;
+ prev = &first;
+ while (!list_empty(&req->rl_registered)) {
+ mr = rpcrdma_mr_pop(&req->rl_registered);
+
+ trace_xprtrdma_mr_localinv(mr);
+ r_xprt->rx_stats.local_inv_needed++;
+
+ frwr = &mr->frwr;
+ frwr->fr_cqe.done = frwr_wc_localinv;
+ frwr->fr_req = req;
+ last = &frwr->fr_invwr;
+ last->next = NULL;
+ last->wr_cqe = &frwr->fr_cqe;
+ last->sg_list = NULL;
+ last->num_sge = 0;
+ last->opcode = IB_WR_LOCAL_INV;
+ last->send_flags = IB_SEND_SIGNALED;
+ last->ex.invalidate_rkey = mr->mr_handle;
+
+ *prev = last;
+ prev = &last->next;
+ }
+
+ /* Strong send queue ordering guarantees that when the
+ * last WR in the chain completes, all WRs in the chain
+ * are complete. The last completion will wake up the
+ * RPC waiter.
+ */
+ frwr->fr_cqe.done = frwr_wc_localinv_done;
+
+ /* Transport disconnect drains the receive CQ before it
+ * replaces the QP. The RPC reply handler won't call us
+ * unless ri_id->qp is a valid pointer.
+ */
+ bad_wr = NULL;
+ rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr);
+ trace_xprtrdma_post_send(req, rc);
+ if (!rc)
+ return;
+
+ /* Recycle MRs in the LOCAL_INV chain that did not get posted.
*/
while (bad_wr) {
- frwr = container_of(bad_wr, struct rpcrdma_frwr,
- fr_invwr);
+ frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr);
mr = container_of(frwr, struct rpcrdma_mr, frwr);
bad_wr = bad_wr->next;
- list_del_init(&mr->mr_list);
rpcrdma_mr_recycle(mr);
}
+
+ /* The final LOCAL_INV WR in the chain is supposed to
+ * do the wake. If it was never posted, the wake will
+ * not happen, so wake here in that case.
+ */
+ rpcrdma_complete_rqst(req->rl_reply);
}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 85115a2e2639..4345e6912392 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -366,6 +366,9 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
unsigned int pos;
int nsegs;
+ if (rtype == rpcrdma_noch)
+ goto done;
+
pos = rqst->rq_snd_buf.head[0].iov_len;
if (rtype == rpcrdma_areadch)
pos = 0;
@@ -389,7 +392,8 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
nsegs -= mr->mr_nents;
} while (nsegs);
- return 0;
+done:
+ return encode_item_not_present(xdr);
}
/* Register and XDR encode the Write list. Supports encoding a list
@@ -417,6 +421,9 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
int nsegs, nchunks;
__be32 *segcount;
+ if (wtype != rpcrdma_writech)
+ goto done;
+
seg = req->rl_segments;
nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
rqst->rq_rcv_buf.head[0].iov_len,
@@ -451,7 +458,8 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
/* Update count of segments in this Write chunk */
*segcount = cpu_to_be32(nchunks);
- return 0;
+done:
+ return encode_item_not_present(xdr);
}
/* Register and XDR encode the Reply chunk. Supports encoding an array
@@ -476,6 +484,9 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
int nsegs, nchunks;
__be32 *segcount;
+ if (wtype != rpcrdma_replych)
+ return encode_item_not_present(xdr);
+
seg = req->rl_segments;
nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
if (nsegs < 0)
@@ -511,6 +522,16 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
return 0;
}
+static void rpcrdma_sendctx_done(struct kref *kref)
+{
+ struct rpcrdma_req *req =
+ container_of(kref, struct rpcrdma_req, rl_kref);
+ struct rpcrdma_rep *rep = req->rl_reply;
+
+ rpcrdma_complete_rqst(rep);
+ rep->rr_rxprt->rx_stats.reply_waits_for_send++;
+}
+
/**
* rpcrdma_sendctx_unmap - DMA-unmap Send buffer
* @sc: sendctx containing SGEs to unmap
@@ -520,6 +541,9 @@ void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
{
struct ib_sge *sge;
+ if (!sc->sc_unmap_count)
+ return;
+
/* The first two SGEs contain the transport header and
* the inline buffer. These are always left mapped so
* they can be cheaply re-used.
@@ -529,9 +553,7 @@ void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
ib_dma_unmap_page(sc->sc_device, sge->addr, sge->length,
DMA_TO_DEVICE);
- if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES,
- &sc->sc_req->rl_flags))
- wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES);
+ kref_put(&sc->sc_req->rl_kref, rpcrdma_sendctx_done);
}
/* Prepare an SGE for the RPC-over-RDMA transport header.
@@ -666,7 +688,7 @@ map_tail:
out:
sc->sc_wr.num_sge += sge_no;
if (sc->sc_unmap_count)
- __set_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
+ kref_get(&req->rl_kref);
return true;
out_regbuf:
@@ -699,22 +721,28 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req, u32 hdrlen,
struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
{
+ int ret;
+
+ ret = -EAGAIN;
req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt);
if (!req->rl_sendctx)
- return -EAGAIN;
+ goto err;
req->rl_sendctx->sc_wr.num_sge = 0;
req->rl_sendctx->sc_unmap_count = 0;
req->rl_sendctx->sc_req = req;
- __clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
+ kref_init(&req->rl_kref);
+ ret = -EIO;
if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen))
- return -EIO;
-
+ goto err;
if (rtype != rpcrdma_areadch)
if (!rpcrdma_prepare_msg_sges(r_xprt, req, xdr, rtype))
- return -EIO;
-
+ goto err;
return 0;
+
+err:
+ trace_xprtrdma_prepsend_failed(&req->rl_slot, ret);
+ return ret;
}
/**
@@ -842,50 +870,28 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
* send a Call message with a Position Zero Read chunk and a
* regular Read chunk at the same time.
*/
- if (rtype != rpcrdma_noch) {
- ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype);
- if (ret)
- goto out_err;
- }
- ret = encode_item_not_present(xdr);
+ ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype);
if (ret)
goto out_err;
-
- if (wtype == rpcrdma_writech) {
- ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype);
- if (ret)
- goto out_err;
- }
- ret = encode_item_not_present(xdr);
+ ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype);
if (ret)
goto out_err;
-
- if (wtype != rpcrdma_replych)
- ret = encode_item_not_present(xdr);
- else
- ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype);
+ ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype);
if (ret)
goto out_err;
- trace_xprtrdma_marshal(rqst, xdr_stream_pos(xdr), rtype, wtype);
-
- ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr),
+ ret = rpcrdma_prepare_send_sges(r_xprt, req, req->rl_hdrbuf.len,
&rqst->rq_snd_buf, rtype);
if (ret)
goto out_err;
+
+ trace_xprtrdma_marshal(req, rtype, wtype);
return 0;
out_err:
trace_xprtrdma_marshal_failed(rqst, ret);
- switch (ret) {
- case -EAGAIN:
- xprt_wait_for_buffer_space(rqst->rq_xprt);
- break;
- case -ENOBUFS:
- break;
- default:
- r_xprt->rx_stats.failed_marshal_count++;
- }
+ r_xprt->rx_stats.failed_marshal_count++;
+ frwr_reset(req);
return ret;
}
@@ -1269,51 +1275,17 @@ out_badheader:
goto out;
}
-void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
-{
- /* Invalidate and unmap the data payloads before waking
- * the waiting application. This guarantees the memory
- * regions are properly fenced from the server before the
- * application accesses the data. It also ensures proper
- * send flow control: waking the next RPC waits until this
- * RPC has relinquished all its Send Queue entries.
- */
- if (!list_empty(&req->rl_registered))
- frwr_unmap_sync(r_xprt, &req->rl_registered);
-
- /* Ensure that any DMA mapped pages associated with
- * the Send of the RPC Call have been unmapped before
- * allowing the RPC to complete. This protects argument
- * memory not controlled by the RPC client from being
- * re-used before we're done with it.
- */
- if (test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
- r_xprt->rx_stats.reply_waits_for_send++;
- out_of_line_wait_on_bit(&req->rl_flags,
- RPCRDMA_REQ_F_TX_RESOURCES,
- bit_wait,
- TASK_UNINTERRUPTIBLE);
- }
-}
-
-/* Reply handling runs in the poll worker thread. Anything that
- * might wait is deferred to a separate workqueue.
- */
-void rpcrdma_deferred_completion(struct work_struct *work)
+static void rpcrdma_reply_done(struct kref *kref)
{
- struct rpcrdma_rep *rep =
- container_of(work, struct rpcrdma_rep, rr_work);
- struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
- struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
+ struct rpcrdma_req *req =
+ container_of(kref, struct rpcrdma_req, rl_kref);
- trace_xprtrdma_defer_cmp(rep);
- if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
- frwr_reminv(rep, &req->rl_registered);
- rpcrdma_release_rqst(r_xprt, req);
- rpcrdma_complete_rqst(rep);
+ rpcrdma_complete_rqst(req->rl_reply);
}
-/* Process received RPC/RDMA messages.
+/**
+ * rpcrdma_reply_handler - Process received RPC/RDMA messages
+ * @rep: Incoming rpcrdma_rep object to process
*
* Errors must result in the RPC task either being awakened, or
* allowed to timeout, to discover the errors at that time.
@@ -1360,10 +1332,10 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
else if (credits > buf->rb_max_requests)
credits = buf->rb_max_requests;
if (buf->rb_credits != credits) {
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
buf->rb_credits = credits;
xprt->cwnd = credits << RPC_CWNDSHIFT;
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
}
req = rpcr_to_rdmar(rqst);
@@ -1373,10 +1345,16 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
}
req->rl_reply = rep;
rep->rr_rqst = rqst;
- clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
- queue_work(buf->rb_completion_wq, &rep->rr_work);
+
+ if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
+ frwr_reminv(rep, &req->rl_registered);
+ if (!list_empty(&req->rl_registered))
+ frwr_unmap_async(r_xprt, req);
+ /* LocalInv completion will complete the RPC */
+ else
+ kref_put(&req->rl_kref, rpcrdma_reply_done);
return;
out_badversion:
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index bed57d8b5c19..d1fcc41d5eb5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -72,9 +72,9 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
else if (credits > r_xprt->rx_buf.rb_bc_max_requests)
credits = r_xprt->rx_buf.rb_bc_max_requests;
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
xprt->cwnd = credits << RPC_CWNDSHIFT;
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
spin_lock(&xprt->queue_lock);
ret = 0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 0004535c0188..3fe665152d95 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -226,9 +226,9 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
* Enqueue the new transport on the accept queue of the listening
* transport
*/
- spin_lock_bh(&listen_xprt->sc_lock);
+ spin_lock(&listen_xprt->sc_lock);
list_add_tail(&newxprt->sc_accept_q, &listen_xprt->sc_accept_q);
- spin_unlock_bh(&listen_xprt->sc_lock);
+ spin_unlock(&listen_xprt->sc_lock);
set_bit(XPT_CONN, &listen_xprt->sc_xprt.xpt_flags);
svc_xprt_enqueue(&listen_xprt->sc_xprt);
@@ -401,7 +401,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
clear_bit(XPT_CONN, &xprt->xpt_flags);
/* Get the next entry off the accept list */
- spin_lock_bh(&listen_rdma->sc_lock);
+ spin_lock(&listen_rdma->sc_lock);
if (!list_empty(&listen_rdma->sc_accept_q)) {
newxprt = list_entry(listen_rdma->sc_accept_q.next,
struct svcxprt_rdma, sc_accept_q);
@@ -409,7 +409,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
}
if (!list_empty(&listen_rdma->sc_accept_q))
set_bit(XPT_CONN, &listen_rdma->sc_xprt.xpt_flags);
- spin_unlock_bh(&listen_rdma->sc_lock);
+ spin_unlock(&listen_rdma->sc_lock);
if (!newxprt)
return NULL;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 1f73a6a7e43c..2ec349ed4770 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -80,7 +80,6 @@ static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE;
static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE;
static unsigned int min_inline_size = RPCRDMA_MIN_INLINE;
static unsigned int max_inline_size = RPCRDMA_MAX_INLINE;
-static unsigned int zero;
static unsigned int max_padding = PAGE_SIZE;
static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
static unsigned int max_memreg = RPCRDMA_LAST - 1;
@@ -122,7 +121,7 @@ static struct ctl_table xr_tunables_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &max_padding,
},
{
@@ -298,6 +297,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
module_put(THIS_MODULE);
}
+/* 60 second timeout, no retries */
static const struct rpc_timeout xprt_rdma_default_timeout = {
.to_initval = 60 * HZ,
.to_maxval = 60 * HZ,
@@ -323,8 +323,9 @@ xprt_setup_rdma(struct xprt_create *args)
if (!xprt)
return ERR_PTR(-ENOMEM);
- /* 60 second timeout, no retries */
xprt->timeout = &xprt_rdma_default_timeout;
+ xprt->connect_timeout = xprt->timeout->to_initval;
+ xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
xprt->bind_timeout = RPCRDMA_BIND_TO;
xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
@@ -487,31 +488,64 @@ xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
}
/**
- * xprt_rdma_connect - try to establish a transport connection
+ * xprt_rdma_set_connect_timeout - set timeouts for establishing a connection
+ * @xprt: controlling transport instance
+ * @connect_timeout: reconnect timeout after client disconnects
+ * @reconnect_timeout: reconnect timeout after server disconnects
+ *
+ */
+static void xprt_rdma_tcp_set_connect_timeout(struct rpc_xprt *xprt,
+ unsigned long connect_timeout,
+ unsigned long reconnect_timeout)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+
+ trace_xprtrdma_op_set_cto(r_xprt, connect_timeout, reconnect_timeout);
+
+ spin_lock(&xprt->transport_lock);
+
+ if (connect_timeout < xprt->connect_timeout) {
+ struct rpc_timeout to;
+ unsigned long initval;
+
+ to = *xprt->timeout;
+ initval = connect_timeout;
+ if (initval < RPCRDMA_INIT_REEST_TO << 1)
+ initval = RPCRDMA_INIT_REEST_TO << 1;
+ to.to_initval = initval;
+ to.to_maxval = initval;
+ r_xprt->rx_timeout = to;
+ xprt->timeout = &r_xprt->rx_timeout;
+ xprt->connect_timeout = connect_timeout;
+ }
+
+ if (reconnect_timeout < xprt->max_reconnect_timeout)
+ xprt->max_reconnect_timeout = reconnect_timeout;
+
+ spin_unlock(&xprt->transport_lock);
+}
+
+/**
+ * xprt_rdma_connect - schedule an attempt to reconnect
* @xprt: transport state
- * @task: RPC scheduler context
+ * @task: RPC scheduler context (unused)
*
*/
static void
xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ unsigned long delay;
trace_xprtrdma_op_connect(r_xprt);
+
+ delay = 0;
if (r_xprt->rx_ep.rep_connected != 0) {
- /* Reconnect */
- schedule_delayed_work(&r_xprt->rx_connect_worker,
- xprt->reestablish_timeout);
- xprt->reestablish_timeout <<= 1;
- if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO)
- xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO;
- else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
- xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
- } else {
- schedule_delayed_work(&r_xprt->rx_connect_worker, 0);
- if (!RPC_IS_ASYNC(task))
- flush_delayed_work(&r_xprt->rx_connect_worker);
+ delay = xprt_reconnect_delay(xprt);
+ xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO);
}
+ queue_delayed_work(xprtiod_workqueue, &r_xprt->rx_connect_worker,
+ delay);
}
/**
@@ -550,8 +584,11 @@ out_sleep:
static void
xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst)
{
+ struct rpcrdma_xprt *r_xprt =
+ container_of(xprt, struct rpcrdma_xprt, rx_xprt);
+
memset(rqst, 0, sizeof(*rqst));
- rpcrdma_buffer_put(rpcr_to_rdmar(rqst));
+ rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
rpc_wake_up_next(&xprt->backlog);
}
@@ -618,9 +655,16 @@ xprt_rdma_free(struct rpc_task *task)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
- if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
- rpcrdma_release_rqst(r_xprt, req);
trace_xprtrdma_op_free(task, req);
+
+ if (!list_empty(&req->rl_registered))
+ frwr_unmap_sync(r_xprt, req);
+
+ /* XXX: If the RPC is completing because of a signal and
+ * not because a reply was received, we ought to ensure
+ * that the Send completion has fired, so that memory
+ * involved with the Send is not still visible to the NIC.
+ */
}
/**
@@ -667,7 +711,6 @@ xprt_rdma_send_request(struct rpc_rqst *rqst)
goto drop_connection;
rqst->rq_xtime = ktime_get();
- __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
@@ -760,6 +803,7 @@ static const struct rpc_xprt_ops xprt_rdma_procs = {
.send_request = xprt_rdma_send_request,
.close = xprt_rdma_close,
.destroy = xprt_rdma_destroy,
+ .set_connect_timeout = xprt_rdma_tcp_set_connect_timeout,
.print_stats = xprt_rdma_print_stats,
.enable_swap = xprt_rdma_enable_swap,
.disable_swap = xprt_rdma_disable_swap,
@@ -767,6 +811,7 @@ static const struct rpc_xprt_ops xprt_rdma_procs = {
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
.bc_setup = xprt_rdma_bc_setup,
.bc_maxpayload = xprt_rdma_bc_maxpayload,
+ .bc_num_slots = xprt_rdma_bc_max_slots,
.bc_free_rqst = xprt_rdma_bc_free_rqst,
.bc_destroy = xprt_rdma_bc_destroy,
#endif
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 84bb37924540..805b1f35e1ca 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -89,14 +89,12 @@ static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
*/
static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
{
- struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
/* Flush Receives, then wait for deferred Reply work
* to complete.
*/
ib_drain_rq(ia->ri_id->qp);
- drain_workqueue(buf->rb_completion_wq);
/* Deferred Reply processing might have scheduled
* local invalidations.
@@ -901,7 +899,7 @@ out_emptyq:
* completions recently. This is a sign the Send Queue is
* backing up. Cause the caller to pause and try again.
*/
- set_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags);
+ xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
r_xprt->rx_stats.empty_sendctx_q++;
return NULL;
}
@@ -936,10 +934,7 @@ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
/* Paired with READ_ONCE */
smp_store_release(&buf->rb_sc_tail, next_tail);
- if (test_and_clear_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags)) {
- smp_mb__after_atomic();
- xprt_write_space(&sc->sc_xprt->rx_xprt);
- }
+ xprt_write_space(&sc->sc_xprt->rx_xprt);
}
static void
@@ -977,8 +972,6 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
r_xprt->rx_stats.mrs_allocated += count;
spin_unlock(&buf->rb_mrlock);
trace_xprtrdma_createmrs(r_xprt, count);
-
- xprt_write_space(&r_xprt->rx_xprt);
}
static void
@@ -990,6 +983,7 @@ rpcrdma_mr_refresh_worker(struct work_struct *work)
rx_buf);
rpcrdma_mrs_create(r_xprt);
+ xprt_write_space(&r_xprt->rx_xprt);
}
/**
@@ -1025,7 +1019,6 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
if (!req->rl_recvbuf)
goto out4;
- req->rl_buffer = buffer;
INIT_LIST_HEAD(&req->rl_registered);
spin_lock(&buffer->rb_lock);
list_add(&req->rl_all, &buffer->rb_allreqs);
@@ -1042,9 +1035,9 @@ out1:
return NULL;
}
-static bool rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, bool temp)
+static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
+ bool temp)
{
- struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep;
rep = kzalloc(sizeof(*rep), GFP_KERNEL);
@@ -1055,27 +1048,22 @@ static bool rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, bool temp)
DMA_FROM_DEVICE, GFP_KERNEL);
if (!rep->rr_rdmabuf)
goto out_free;
+
xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf),
rdmab_length(rep->rr_rdmabuf));
-
rep->rr_cqe.done = rpcrdma_wc_receive;
rep->rr_rxprt = r_xprt;
- INIT_WORK(&rep->rr_work, rpcrdma_deferred_completion);
rep->rr_recv_wr.next = NULL;
rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
rep->rr_recv_wr.num_sge = 1;
rep->rr_temp = temp;
-
- spin_lock(&buf->rb_lock);
- list_add(&rep->rr_list, &buf->rb_recv_bufs);
- spin_unlock(&buf->rb_lock);
- return true;
+ return rep;
out_free:
kfree(rep);
out:
- return false;
+ return NULL;
}
/**
@@ -1089,7 +1077,6 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
int i, rc;
- buf->rb_flags = 0;
buf->rb_max_requests = r_xprt->rx_ep.rep_max_requests;
buf->rb_bc_srv_max_requests = 0;
spin_lock_init(&buf->rb_mrlock);
@@ -1122,15 +1109,6 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
if (rc)
goto out;
- buf->rb_completion_wq = alloc_workqueue("rpcrdma-%s",
- WQ_MEM_RECLAIM | WQ_HIGHPRI,
- 0,
- r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR]);
- if (!buf->rb_completion_wq) {
- rc = -ENOMEM;
- goto out;
- }
-
return 0;
out:
rpcrdma_buffer_destroy(buf);
@@ -1204,11 +1182,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
{
cancel_delayed_work_sync(&buf->rb_refresh_worker);
- if (buf->rb_completion_wq) {
- destroy_workqueue(buf->rb_completion_wq);
- buf->rb_completion_wq = NULL;
- }
-
rpcrdma_sendctxs_destroy(buf);
while (!list_empty(&buf->rb_recv_bufs)) {
@@ -1325,13 +1298,12 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
/**
* rpcrdma_buffer_put - Put request/reply buffers back into pool
+ * @buffers: buffer pool
* @req: object to return
*
*/
-void
-rpcrdma_buffer_put(struct rpcrdma_req *req)
+void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
{
- struct rpcrdma_buffer *buffers = req->rl_buffer;
struct rpcrdma_rep *rep = req->rl_reply;
req->rl_reply = NULL;
@@ -1484,8 +1456,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
int rc;
- if (!ep->rep_send_count ||
- test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
+ if (!ep->rep_send_count || kref_read(&req->rl_kref) > 1) {
send_wr->send_flags |= IB_SEND_SIGNALED;
ep->rep_send_count = ep->rep_send_batch;
} else {
@@ -1505,11 +1476,13 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
- struct ib_recv_wr *wr, *bad_wr;
+ struct ib_recv_wr *i, *wr, *bad_wr;
+ struct rpcrdma_rep *rep;
int needed, count, rc;
rc = 0;
count = 0;
+
needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
if (ep->rep_receive_count > needed)
goto out;
@@ -1517,51 +1490,65 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
if (!temp)
needed += RPCRDMA_MAX_RECV_BATCH;
- count = 0;
+ /* fast path: all needed reps can be found on the free list */
wr = NULL;
+ spin_lock(&buf->rb_lock);
while (needed) {
- struct rpcrdma_regbuf *rb;
- struct rpcrdma_rep *rep;
-
- spin_lock(&buf->rb_lock);
rep = list_first_entry_or_null(&buf->rb_recv_bufs,
struct rpcrdma_rep, rr_list);
- if (likely(rep))
- list_del(&rep->rr_list);
- spin_unlock(&buf->rb_lock);
- if (!rep) {
- if (!rpcrdma_rep_create(r_xprt, temp))
- break;
- continue;
- }
+ if (!rep)
+ break;
- rb = rep->rr_rdmabuf;
- if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) {
- rpcrdma_recv_buffer_put(rep);
+ list_del(&rep->rr_list);
+ rep->rr_recv_wr.next = wr;
+ wr = &rep->rr_recv_wr;
+ --needed;
+ }
+ spin_unlock(&buf->rb_lock);
+
+ while (needed) {
+ rep = rpcrdma_rep_create(r_xprt, temp);
+ if (!rep)
break;
- }
- trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe);
rep->rr_recv_wr.next = wr;
wr = &rep->rr_recv_wr;
- ++count;
--needed;
}
- if (!count)
+ if (!wr)
goto out;
+ for (i = wr; i; i = i->next) {
+ rep = container_of(i, struct rpcrdma_rep, rr_recv_wr);
+
+ if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf))
+ goto release_wrs;
+
+ trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe);
+ ++count;
+ }
+
rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr,
(const struct ib_recv_wr **)&bad_wr);
+out:
+ trace_xprtrdma_post_recvs(r_xprt, count, rc);
if (rc) {
- for (wr = bad_wr; wr; wr = wr->next) {
+ for (wr = bad_wr; wr;) {
struct rpcrdma_rep *rep;
rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr);
+ wr = wr->next;
rpcrdma_recv_buffer_put(rep);
--count;
}
}
ep->rep_receive_count += count;
-out:
- trace_xprtrdma_post_recvs(r_xprt, count, rc);
+ return;
+
+release_wrs:
+ for (i = wr; i;) {
+ rep = container_of(i, struct rpcrdma_rep, rr_recv_wr);
+ i = i->next;
+ rpcrdma_recv_buffer_put(rep);
+ }
}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index d1e0749bcbc4..92ce09fcea74 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -44,7 +44,8 @@
#include <linux/wait.h> /* wait_queue_head_t, etc */
#include <linux/spinlock.h> /* spinlock_t, etc */
-#include <linux/atomic.h> /* atomic_t, etc */
+#include <linux/atomic.h> /* atomic_t, etc */
+#include <linux/kref.h> /* struct kref */
#include <linux/workqueue.h> /* struct work_struct */
#include <rdma/rdma_cm.h> /* RDMA connection api */
@@ -202,10 +203,9 @@ struct rpcrdma_rep {
bool rr_temp;
struct rpcrdma_regbuf *rr_rdmabuf;
struct rpcrdma_xprt *rr_rxprt;
- struct work_struct rr_work;
+ struct rpc_rqst *rr_rqst;
struct xdr_buf rr_hdrbuf;
struct xdr_stream rr_stream;
- struct rpc_rqst *rr_rqst;
struct list_head rr_list;
struct ib_recv_wr rr_recv_wr;
};
@@ -240,18 +240,12 @@ struct rpcrdma_sendctx {
* An external memory region is any buffer or page that is registered
* on the fly (ie, not pre-registered).
*/
-enum rpcrdma_frwr_state {
- FRWR_IS_INVALID, /* ready to be used */
- FRWR_IS_VALID, /* in use */
- FRWR_FLUSHED_FR, /* flushed FASTREG WR */
- FRWR_FLUSHED_LI, /* flushed LOCALINV WR */
-};
-
+struct rpcrdma_req;
struct rpcrdma_frwr {
struct ib_mr *fr_mr;
struct ib_cqe fr_cqe;
- enum rpcrdma_frwr_state fr_state;
struct completion fr_linv_done;
+ struct rpcrdma_req *fr_req;
union {
struct ib_reg_wr fr_regwr;
struct ib_send_wr fr_invwr;
@@ -326,7 +320,6 @@ struct rpcrdma_buffer;
struct rpcrdma_req {
struct list_head rl_list;
struct rpc_rqst rl_slot;
- struct rpcrdma_buffer *rl_buffer;
struct rpcrdma_rep *rl_reply;
struct xdr_stream rl_stream;
struct xdr_buf rl_hdrbuf;
@@ -336,18 +329,12 @@ struct rpcrdma_req {
struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */
struct list_head rl_all;
- unsigned long rl_flags;
+ struct kref rl_kref;
struct list_head rl_registered; /* registered segments */
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
};
-/* rl_flags */
-enum {
- RPCRDMA_REQ_F_PENDING = 0,
- RPCRDMA_REQ_F_TX_RESOURCES,
-};
-
static inline struct rpcrdma_req *
rpcr_to_rdmar(const struct rpc_rqst *rqst)
{
@@ -391,22 +378,15 @@ struct rpcrdma_buffer {
struct list_head rb_recv_bufs;
struct list_head rb_allreqs;
- unsigned long rb_flags;
u32 rb_max_requests;
u32 rb_credits; /* most recent credit grant */
u32 rb_bc_srv_max_requests;
u32 rb_bc_max_requests;
- struct workqueue_struct *rb_completion_wq;
struct delayed_work rb_refresh_worker;
};
-/* rb_flags */
-enum {
- RPCRDMA_BUF_F_EMPTY_SCQ = 0,
-};
-
/*
* Statistics for RPCRDMA
*/
@@ -452,6 +432,7 @@ struct rpcrdma_xprt {
struct rpcrdma_ep rx_ep;
struct rpcrdma_buffer rx_buf;
struct delayed_work rx_connect_worker;
+ struct rpc_timeout rx_timeout;
struct rpcrdma_stats rx_stats;
};
@@ -518,7 +499,8 @@ rpcrdma_mr_recycle(struct rpcrdma_mr *mr)
}
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
-void rpcrdma_buffer_put(struct rpcrdma_req *);
+void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers,
+ struct rpcrdma_req *req);
void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size,
@@ -564,6 +546,7 @@ rpcrdma_data_dir(bool writing)
/* Memory registration calls xprtrdma/frwr_ops.c
*/
bool frwr_is_supported(struct ib_device *device);
+void frwr_reset(struct rpcrdma_req *req);
int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep);
int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr);
void frwr_release_mr(struct rpcrdma_mr *mr);
@@ -574,8 +557,8 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_mr **mr);
int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req);
void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs);
-void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt,
- struct list_head *mrs);
+void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
+void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
/*
* RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
@@ -598,9 +581,6 @@ int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
void rpcrdma_reply_handler(struct rpcrdma_rep *rep);
-void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt,
- struct rpcrdma_req *req);
-void rpcrdma_deferred_completion(struct work_struct *work);
static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
{
@@ -625,6 +605,7 @@ void xprt_rdma_cleanup(void);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
+unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *);
int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 36652352a38c..e2176c167a57 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -880,7 +880,7 @@ static int xs_nospace(struct rpc_rqst *req)
req->rq_slen);
/* Protect against races with write_space */
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
/* Don't race with disconnect */
if (xprt_connected(xprt)) {
@@ -890,7 +890,7 @@ static int xs_nospace(struct rpc_rqst *req)
} else
ret = -ENOTCONN;
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
/* Race breaker in case memory is freed before above code is called */
if (ret == -EAGAIN) {
@@ -909,6 +909,7 @@ static int xs_nospace(struct rpc_rqst *req)
static void
xs_stream_prepare_request(struct rpc_rqst *req)
{
+ xdr_free_bvec(&req->rq_rcv_buf);
req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_KERNEL);
}
@@ -1211,6 +1212,15 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt)
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
+ clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state);
+ clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state);
+ clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state);
+}
+
+static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr)
+{
+ set_bit(nr, &transport->sock_state);
+ queue_work(xprtiod_workqueue, &transport->error_worker);
}
static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
@@ -1231,6 +1241,7 @@ static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
*/
static void xs_error_report(struct sock *sk)
{
+ struct sock_xprt *transport;
struct rpc_xprt *xprt;
int err;
@@ -1238,13 +1249,14 @@ static void xs_error_report(struct sock *sk)
if (!(xprt = xprt_from_sock(sk)))
goto out;
+ transport = container_of(xprt, struct sock_xprt, xprt);
err = -sk->sk_err;
if (err == 0)
goto out;
dprintk("RPC: xs_error_report client %p, error=%d...\n",
xprt, -err);
trace_rpc_socket_error(xprt, sk->sk_socket, err);
- xprt_wake_pending_tasks(xprt, err);
+ xs_run_error_worker(transport, XPRT_SOCK_WAKE_ERROR);
out:
read_unlock_bh(&sk->sk_callback_lock);
}
@@ -1333,6 +1345,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
cancel_delayed_work_sync(&transport->connect_worker);
xs_close(xprt);
cancel_work_sync(&transport->recv_worker);
+ cancel_work_sync(&transport->error_worker);
xs_xprt_free(xprt);
module_put(THIS_MODULE);
}
@@ -1386,9 +1399,9 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
}
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
xprt_adjust_cwnd(xprt, task, copied);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
spin_lock(&xprt->queue_lock);
xprt_complete_rqst(task, copied);
__UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS);
@@ -1498,7 +1511,6 @@ static void xs_tcp_state_change(struct sock *sk)
trace_rpc_socket_state_change(xprt, sk->sk_socket);
switch (sk->sk_state) {
case TCP_ESTABLISHED:
- spin_lock(&xprt->transport_lock);
if (!xprt_test_and_set_connected(xprt)) {
xprt->connect_cookie++;
clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
@@ -1507,9 +1519,8 @@ static void xs_tcp_state_change(struct sock *sk)
xprt->stat.connect_count++;
xprt->stat.connect_time += (long)jiffies -
xprt->stat.connect_start;
- xprt_wake_pending_tasks(xprt, -EAGAIN);
+ xs_run_error_worker(transport, XPRT_SOCK_WAKE_PENDING);
}
- spin_unlock(&xprt->transport_lock);
break;
case TCP_FIN_WAIT1:
/* The client initiated a shutdown of the socket */
@@ -1525,7 +1536,7 @@ static void xs_tcp_state_change(struct sock *sk)
/* The server initiated a shutdown of the socket */
xprt->connect_cookie++;
clear_bit(XPRT_CONNECTED, &xprt->state);
- xs_tcp_force_close(xprt);
+ xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT);
/* fall through */
case TCP_CLOSING:
/*
@@ -1547,7 +1558,7 @@ static void xs_tcp_state_change(struct sock *sk)
xprt_clear_connecting(xprt);
clear_bit(XPRT_CLOSING, &xprt->state);
/* Trigger the socket release */
- xs_tcp_force_close(xprt);
+ xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT);
}
out:
read_unlock_bh(&sk->sk_callback_lock);
@@ -1556,6 +1567,7 @@ static void xs_tcp_state_change(struct sock *sk)
static void xs_write_space(struct sock *sk)
{
struct socket_wq *wq;
+ struct sock_xprt *transport;
struct rpc_xprt *xprt;
if (!sk->sk_socket)
@@ -1564,13 +1576,14 @@ static void xs_write_space(struct sock *sk)
if (unlikely(!(xprt = xprt_from_sock(sk))))
return;
+ transport = container_of(xprt, struct sock_xprt, xprt);
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0)
goto out;
- if (xprt_write_space(xprt))
- sk->sk_write_pending--;
+ xs_run_error_worker(transport, XPRT_SOCK_WAKE_WRITE);
+ sk->sk_write_pending--;
out:
rcu_read_unlock();
}
@@ -1664,9 +1677,9 @@ static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t
*/
static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task)
{
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
xprt_adjust_cwnd(xprt, task, -ETIMEDOUT);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
}
static int xs_get_random_port(void)
@@ -2201,13 +2214,13 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
unsigned int opt_on = 1;
unsigned int timeo;
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ);
keepcnt = xprt->timeout->to_retries + 1;
timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
(xprt->timeout->to_retries + 1);
clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
/* TCP Keepalive options */
kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
@@ -2232,7 +2245,7 @@ static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
struct rpc_timeout to;
unsigned long initval;
- spin_lock_bh(&xprt->transport_lock);
+ spin_lock(&xprt->transport_lock);
if (reconnect_timeout < xprt->max_reconnect_timeout)
xprt->max_reconnect_timeout = reconnect_timeout;
if (connect_timeout < xprt->connect_timeout) {
@@ -2249,7 +2262,7 @@ static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
xprt->connect_timeout = connect_timeout;
}
set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
- spin_unlock_bh(&xprt->transport_lock);
+ spin_unlock(&xprt->transport_lock);
}
static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
@@ -2402,25 +2415,6 @@ out:
xprt_wake_pending_tasks(xprt, status);
}
-static unsigned long xs_reconnect_delay(const struct rpc_xprt *xprt)
-{
- unsigned long start, now = jiffies;
-
- start = xprt->stat.connect_start + xprt->reestablish_timeout;
- if (time_after(start, now))
- return start - now;
- return 0;
-}
-
-static void xs_reconnect_backoff(struct rpc_xprt *xprt)
-{
- xprt->reestablish_timeout <<= 1;
- if (xprt->reestablish_timeout > xprt->max_reconnect_timeout)
- xprt->reestablish_timeout = xprt->max_reconnect_timeout;
- if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
- xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
-}
-
/**
* xs_connect - connect a socket to a remote endpoint
* @xprt: pointer to transport structure
@@ -2450,8 +2444,8 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
/* Start by resetting any existing state */
xs_reset_transport(transport);
- delay = xs_reconnect_delay(xprt);
- xs_reconnect_backoff(xprt);
+ delay = xprt_reconnect_delay(xprt);
+ xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO);
} else
dprintk("RPC: xs_connect scheduled xprt %p\n", xprt);
@@ -2461,6 +2455,56 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
delay);
}
+static void xs_wake_disconnect(struct sock_xprt *transport)
+{
+ if (test_and_clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state))
+ xs_tcp_force_close(&transport->xprt);
+}
+
+static void xs_wake_write(struct sock_xprt *transport)
+{
+ if (test_and_clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state))
+ xprt_write_space(&transport->xprt);
+}
+
+static void xs_wake_error(struct sock_xprt *transport)
+{
+ int sockerr;
+ int sockerr_len = sizeof(sockerr);
+
+ if (!test_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
+ return;
+ mutex_lock(&transport->recv_mutex);
+ if (transport->sock == NULL)
+ goto out;
+ if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
+ goto out;
+ if (kernel_getsockopt(transport->sock, SOL_SOCKET, SO_ERROR,
+ (char *)&sockerr, &sockerr_len) != 0)
+ goto out;
+ if (sockerr < 0)
+ xprt_wake_pending_tasks(&transport->xprt, sockerr);
+out:
+ mutex_unlock(&transport->recv_mutex);
+}
+
+static void xs_wake_pending(struct sock_xprt *transport)
+{
+ if (test_and_clear_bit(XPRT_SOCK_WAKE_PENDING, &transport->sock_state))
+ xprt_wake_pending_tasks(&transport->xprt, -EAGAIN);
+}
+
+static void xs_error_handle(struct work_struct *work)
+{
+ struct sock_xprt *transport = container_of(work,
+ struct sock_xprt, error_worker);
+
+ xs_wake_disconnect(transport);
+ xs_wake_write(transport);
+ xs_wake_error(transport);
+ xs_wake_pending(transport);
+}
+
/**
* xs_local_print_stats - display AF_LOCAL socket-specifc stats
* @xprt: rpc_xprt struct containing statistics
@@ -2745,6 +2789,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
#ifdef CONFIG_SUNRPC_BACKCHANNEL
.bc_setup = xprt_setup_bc,
.bc_maxpayload = xs_tcp_bc_maxpayload,
+ .bc_num_slots = xprt_bc_max_slots,
.bc_free_rqst = xprt_free_bc_rqst,
.bc_destroy = xprt_destroy_bc,
#endif
@@ -2873,6 +2918,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
xprt->timeout = &xs_local_default_timeout;
INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
+ INIT_WORK(&transport->error_worker, xs_error_handle);
INIT_DELAYED_WORK(&transport->connect_worker, xs_dummy_setup_socket);
switch (sun->sun_family) {
@@ -2943,6 +2989,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
xprt->timeout = &xs_udp_default_timeout;
INIT_WORK(&transport->recv_worker, xs_udp_data_receive_workfn);
+ INIT_WORK(&transport->error_worker, xs_error_handle);
INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_setup_socket);
switch (addr->sa_family) {
@@ -3024,6 +3071,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
(xprt->timeout->to_retries + 1);
INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
+ INIT_WORK(&transport->error_worker, xs_error_handle);
INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
switch (addr->sa_family) {
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index b88d48d00913..0f1eaed1bd1b 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -75,6 +75,7 @@ void tipc_set_node_addr(struct net *net, u32 addr)
tipc_set_node_id(net, node_id);
}
tn->trial_addr = addr;
+ tn->addr_trial_end = jiffies;
pr_info("32-bit node address hash set to %x\n", addr);
}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 66d3a07bc571..c2c5c53cad22 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -106,8 +106,6 @@ struct tipc_stats {
* @transmitq: queue for sent, non-acked messages
* @backlogq: queue for messages waiting to be sent
* @snt_nxt: next sequence number to use for outbound messages
- * @prev_from: sequence number of most previous retransmission request
- * @stale_limit: time when repeated identical retransmits must force link reset
* @ackers: # of peers that needs to ack each packet before it can be released
* @acked: # last packet acked by a certain peer. Used for broadcast.
* @rcv_nxt: next sequence number to expect for inbound messages
@@ -164,9 +162,7 @@ struct tipc_link {
u16 limit;
} backlog[5];
u16 snd_nxt;
- u16 prev_from;
u16 window;
- unsigned long stale_limit;
/* Reception */
u16 rcv_nxt;
@@ -1044,47 +1040,53 @@ static void tipc_link_advance_backlog(struct tipc_link *l,
* link_retransmit_failure() - Detect repeated retransmit failures
* @l: tipc link sender
* @r: tipc link receiver (= l in case of unicast)
- * @from: seqno of the 1st packet in retransmit request
* @rc: returned code
*
* Return: true if the repeated retransmit failures happens, otherwise
* false
*/
static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r,
- u16 from, int *rc)
+ int *rc)
{
struct sk_buff *skb = skb_peek(&l->transmq);
struct tipc_msg *hdr;
if (!skb)
return false;
- hdr = buf_msg(skb);
- /* Detect repeated retransmit failures on same packet */
- if (r->prev_from != from) {
- r->prev_from = from;
- r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance);
- } else if (time_after(jiffies, r->stale_limit)) {
- pr_warn("Retransmission failure on link <%s>\n", l->name);
- link_print(l, "State of link ");
- pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
- msg_user(hdr), msg_type(hdr), msg_size(hdr),
- msg_errcode(hdr));
- pr_info("sqno %u, prev: %x, src: %x\n",
- msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr));
-
- trace_tipc_list_dump(&l->transmq, true, "retrans failure!");
- trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!");
- trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!");
+ if (!TIPC_SKB_CB(skb)->retr_cnt)
+ return false;
- if (link_is_bc_sndlink(l))
- *rc = TIPC_LINK_DOWN_EVT;
+ if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp +
+ msecs_to_jiffies(r->tolerance)))
+ return false;
+
+ hdr = buf_msg(skb);
+ if (link_is_bc_sndlink(l) && !less(r->acked, msg_seqno(hdr)))
+ return false;
+ pr_warn("Retransmission failure on link <%s>\n", l->name);
+ link_print(l, "State of link ");
+ pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
+ msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr));
+ pr_info("sqno %u, prev: %x, dest: %x\n",
+ msg_seqno(hdr), msg_prevnode(hdr), msg_destnode(hdr));
+ pr_info("retr_stamp %d, retr_cnt %d\n",
+ jiffies_to_msecs(TIPC_SKB_CB(skb)->retr_stamp),
+ TIPC_SKB_CB(skb)->retr_cnt);
+
+ trace_tipc_list_dump(&l->transmq, true, "retrans failure!");
+ trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!");
+ trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!");
+
+ if (link_is_bc_sndlink(l)) {
+ r->state = LINK_RESET;
+ *rc = TIPC_LINK_DOWN_EVT;
+ } else {
*rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
- return true;
}
- return false;
+ return true;
}
/* tipc_link_bc_retrans() - retransmit zero or more packets
@@ -1110,7 +1112,7 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
trace_tipc_link_retrans(r, from, to, &l->transmq);
- if (link_retransmit_failure(l, r, from, &rc))
+ if (link_retransmit_failure(l, r, &rc))
return rc;
skb_queue_walk(&l->transmq, skb) {
@@ -1119,11 +1121,10 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
continue;
if (more(msg_seqno(hdr), to))
break;
- if (link_is_bc_sndlink(l)) {
- if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
- continue;
- TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
- }
+
+ if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
+ continue;
+ TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
_skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE, GFP_ATOMIC);
if (!_skb)
return 0;
@@ -1133,6 +1134,10 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
_skb->priority = TC_PRIO_CONTROL;
__skb_queue_tail(xmitq, _skb);
l->stats.retransmitted++;
+
+ /* Increase actual retrans counter & mark first time */
+ if (!TIPC_SKB_CB(skb)->retr_cnt++)
+ TIPC_SKB_CB(skb)->retr_stamp = jiffies;
}
return 0;
}
@@ -1357,12 +1362,10 @@ static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap,
struct tipc_msg *hdr;
u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
u16 ack = l->rcv_nxt - 1;
+ bool passed = false;
u16 seqno, n = 0;
int rc = 0;
- if (gap && link_retransmit_failure(l, l, acked + 1, &rc))
- return rc;
-
skb_queue_walk_safe(&l->transmq, skb, tmp) {
seqno = buf_seqno(skb);
@@ -1372,12 +1375,17 @@ next_gap_ack:
__skb_unlink(skb, &l->transmq);
kfree_skb(skb);
} else if (less_eq(seqno, acked + gap)) {
- /* retransmit skb */
+ /* First, check if repeated retrans failures occurs? */
+ if (!passed && link_retransmit_failure(l, l, &rc))
+ return rc;
+ passed = true;
+
+ /* retransmit skb if unrestricted*/
if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
continue;
TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME;
-
- _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC);
+ _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE,
+ GFP_ATOMIC);
if (!_skb)
continue;
hdr = buf_msg(_skb);
@@ -1386,6 +1394,10 @@ next_gap_ack:
_skb->priority = TC_PRIO_CONTROL;
__skb_queue_tail(xmitq, _skb);
l->stats.retransmitted++;
+
+ /* Increase actual retrans counter & mark first time */
+ if (!TIPC_SKB_CB(skb)->retr_cnt++)
+ TIPC_SKB_CB(skb)->retr_stamp = jiffies;
} else {
/* retry with Gap ACK blocks if any */
if (!ga || n >= ga->gack_cnt)
@@ -2577,7 +2589,7 @@ int tipc_link_dump(struct tipc_link *l, u16 dqueues, char *buf)
i += scnprintf(buf + i, sz - i, " %x", l->peer_caps);
i += scnprintf(buf + i, sz - i, " %u", l->silent_intv_cnt);
i += scnprintf(buf + i, sz - i, " %u", l->rst_cnt);
- i += scnprintf(buf + i, sz - i, " %u", l->prev_from);
+ i += scnprintf(buf + i, sz - i, " %u", 0);
i += scnprintf(buf + i, sz - i, " %u", 0);
i += scnprintf(buf + i, sz - i, " %u", l->acked);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index da509f0eb9ca..d7ebc9e955f6 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -102,13 +102,15 @@ struct plist;
#define TIPC_MEDIA_INFO_OFFSET 5
struct tipc_skb_cb {
- u32 bytes_read;
- u32 orig_member;
struct sk_buff *tail;
unsigned long nxt_retr;
- bool validated;
+ unsigned long retr_stamp;
+ u32 bytes_read;
+ u32 orig_member;
u16 chain_imp;
u16 ackers;
+ u16 retr_cnt;
+ bool validated;
};
#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 61219f0b9677..44abc8e9c990 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -190,7 +190,7 @@ void tipc_named_node_up(struct net *net, u32 dnode)
struct name_table *nt = tipc_name_table(net);
struct sk_buff_head head;
- __skb_queue_head_init(&head);
+ skb_queue_head_init(&head);
read_lock_bh(&nt->cluster_scope_lock);
named_distribute(net, &head, dnode, &nt->cluster_scope);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index d86030ef1232..e135d4e11231 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -55,6 +55,7 @@ struct tipc_nl_compat_msg {
int rep_type;
int rep_size;
int req_type;
+ int req_size;
struct net *net;
struct sk_buff *rep;
struct tlv_desc *req;
@@ -257,7 +258,8 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
int err;
struct sk_buff *arg;
- if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type))
+ if (msg->req_type && (!msg->req_size ||
+ !TLV_CHECK_TYPE(msg->req, msg->req_type)))
return -EINVAL;
msg->rep = tipc_tlv_alloc(msg->rep_size);
@@ -354,7 +356,8 @@ static int tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
{
int err;
- if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type))
+ if (msg->req_type && (!msg->req_size ||
+ !TLV_CHECK_TYPE(msg->req, msg->req_type)))
return -EINVAL;
err = __tipc_nl_compat_doit(cmd, msg);
@@ -1278,8 +1281,8 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
goto send;
}
- len = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN);
- if (!len || !TLV_OK(msg.req, len)) {
+ msg.req_size = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN);
+ if (msg.req_size && !TLV_OK(msg.req, msg.req_size)) {
msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED);
err = -EOPNOTSUPP;
goto send;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 324a1f91b394..3a5be1d7e572 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1807,6 +1807,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
__skb_queue_head_init(&xmitq);
/* Ensure message is well-formed before touching the header */
+ TIPC_SKB_CB(skb)->validated = false;
if (unlikely(!tipc_msg_validate(&skb)))
goto discard;
hdr = buf_msg(skb);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index dd8537f988c4..83ae41d7e554 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -485,9 +485,8 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
tsk_set_unreturnable(tsk, true);
if (sock->type == SOCK_DGRAM)
tsk_set_unreliable(tsk, true);
- __skb_queue_head_init(&tsk->mc_method.deferredq);
}
-
+ __skb_queue_head_init(&tsk->mc_method.deferredq);
trace_tipc_sk_create(sk, NULL, TIPC_DUMP_NONE, " ");
return 0;
}
diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c
index 9df82a573aa7..6159d327db76 100644
--- a/net/tipc/sysctl.c
+++ b/net/tipc/sysctl.c
@@ -38,8 +38,6 @@
#include <linux/sysctl.h>
-static int zero;
-static int one = 1;
static struct ctl_table_header *tipc_ctl_hdr;
static struct ctl_table tipc_table[] = {
@@ -49,7 +47,7 @@ static struct ctl_table tipc_table[] = {
.maxlen = sizeof(sysctl_tipc_rmem),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
+ .extra1 = SYSCTL_ONE,
},
{
.procname = "named_timeout",
@@ -57,7 +55,7 @@ static struct ctl_table tipc_table[] = {
.maxlen = sizeof(sysctl_tipc_named_timeout),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
},
{
.procname = "sk_filter",
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index f345662890a6..ca8ac96d22a9 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -476,7 +476,7 @@ static void tipc_topsrv_accept(struct work_struct *work)
}
}
-/* tipc_toprsv_listener_data_ready - interrupt callback with connection request
+/* tipc_topsrv_listener_data_ready - interrupt callback with connection request
* The queued job is launched into tipc_topsrv_accept()
*/
static void tipc_topsrv_listener_data_ready(struct sock *sk)
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 7c0b2b778703..43922d86e510 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -373,9 +373,9 @@ static int tls_push_data(struct sock *sk,
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_prot_info *prot = &tls_ctx->prot_info;
struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
- int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE);
struct tls_record_info *record = ctx->open_record;
+ int tls_push_record_flags;
struct page_frag *pfrag;
size_t orig_size = size;
u32 max_open_record_len;
@@ -390,6 +390,9 @@ static int tls_push_data(struct sock *sk,
if (sk->sk_err)
return -sk->sk_err;
+ flags |= MSG_SENDPAGE_DECRYPTED;
+ tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
+
timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
if (tls_is_partially_sent_record(tls_ctx)) {
rc = tls_push_partial_record(sk, tls_ctx, flags);
@@ -576,7 +579,9 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
gfp_t sk_allocation = sk->sk_allocation;
sk->sk_allocation = GFP_ATOMIC;
- tls_push_partial_record(sk, ctx, MSG_DONTWAIT | MSG_NOSIGNAL);
+ tls_push_partial_record(sk, ctx,
+ MSG_DONTWAIT | MSG_NOSIGNAL |
+ MSG_SENDPAGE_DECRYPTED);
sk->sk_allocation = sk_allocation;
}
}
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 4674e57e66b0..43252a801c3f 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -261,24 +261,9 @@ void tls_ctx_free(struct tls_context *ctx)
kfree(ctx);
}
-static void tls_sk_proto_close(struct sock *sk, long timeout)
+static void tls_sk_proto_cleanup(struct sock *sk,
+ struct tls_context *ctx, long timeo)
{
- struct tls_context *ctx = tls_get_ctx(sk);
- long timeo = sock_sndtimeo(sk, 0);
- void (*sk_proto_close)(struct sock *sk, long timeout);
- bool free_ctx = false;
-
- lock_sock(sk);
- sk_proto_close = ctx->sk_proto_close;
-
- if (ctx->tx_conf == TLS_HW_RECORD && ctx->rx_conf == TLS_HW_RECORD)
- goto skip_tx_cleanup;
-
- if (ctx->tx_conf == TLS_BASE && ctx->rx_conf == TLS_BASE) {
- free_ctx = true;
- goto skip_tx_cleanup;
- }
-
if (unlikely(sk->sk_write_pending) &&
!wait_on_pending_writer(sk, &timeo))
tls_handle_open_record(sk, 0);
@@ -287,7 +272,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
if (ctx->tx_conf == TLS_SW) {
kfree(ctx->tx.rec_seq);
kfree(ctx->tx.iv);
- tls_sw_free_resources_tx(sk);
+ tls_sw_release_resources_tx(sk);
#ifdef CONFIG_TLS_DEVICE
} else if (ctx->tx_conf == TLS_HW) {
tls_device_free_resources_tx(sk);
@@ -295,26 +280,46 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
}
if (ctx->rx_conf == TLS_SW)
- tls_sw_free_resources_rx(sk);
+ tls_sw_release_resources_rx(sk);
#ifdef CONFIG_TLS_DEVICE
if (ctx->rx_conf == TLS_HW)
tls_device_offload_cleanup_rx(sk);
-
- if (ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW) {
-#else
- {
#endif
- tls_ctx_free(ctx);
- ctx = NULL;
- }
+}
-skip_tx_cleanup:
+static void tls_sk_proto_close(struct sock *sk, long timeout)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tls_context *ctx = tls_get_ctx(sk);
+ long timeo = sock_sndtimeo(sk, 0);
+ bool free_ctx;
+
+ if (ctx->tx_conf == TLS_SW)
+ tls_sw_cancel_work_tx(ctx);
+
+ lock_sock(sk);
+ free_ctx = ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW;
+
+ if (ctx->tx_conf != TLS_BASE || ctx->rx_conf != TLS_BASE)
+ tls_sk_proto_cleanup(sk, ctx, timeo);
+
+ write_lock_bh(&sk->sk_callback_lock);
+ if (free_ctx)
+ icsk->icsk_ulp_data = NULL;
+ sk->sk_prot = ctx->sk_proto;
+ if (sk->sk_write_space == tls_write_space)
+ sk->sk_write_space = ctx->sk_write_space;
+ write_unlock_bh(&sk->sk_callback_lock);
release_sock(sk);
- sk_proto_close(sk, timeout);
- /* free ctx for TLS_HW_RECORD, used by tcp_set_state
- * for sk->sk_prot->unhash [tls_hw_unhash]
- */
+ if (ctx->tx_conf == TLS_SW)
+ tls_sw_free_ctx_tx(ctx);
+ if (ctx->rx_conf == TLS_SW || ctx->rx_conf == TLS_HW)
+ tls_sw_strparser_done(ctx);
+ if (ctx->rx_conf == TLS_SW)
+ tls_sw_free_ctx_rx(ctx);
+ ctx->sk_proto_close(sk, timeout);
+
if (free_ctx)
tls_ctx_free(ctx);
}
@@ -526,6 +531,8 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
{
#endif
rc = tls_set_sw_offload(sk, ctx, 1);
+ if (rc)
+ goto err_crypto_info;
conf = TLS_SW;
}
} else {
@@ -537,13 +544,13 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
{
#endif
rc = tls_set_sw_offload(sk, ctx, 0);
+ if (rc)
+ goto err_crypto_info;
conf = TLS_SW;
}
+ tls_sw_strparser_arm(sk, ctx);
}
- if (rc)
- goto err_crypto_info;
-
if (tx)
ctx->tx_conf = conf;
else
@@ -607,6 +614,7 @@ static struct tls_context *create_ctx(struct sock *sk)
ctx->setsockopt = sk->sk_prot->setsockopt;
ctx->getsockopt = sk->sk_prot->getsockopt;
ctx->sk_proto_close = sk->sk_prot->close;
+ ctx->unhash = sk->sk_prot->unhash;
return ctx;
}
@@ -764,7 +772,6 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
prot[TLS_HW_RECORD][TLS_HW_RECORD] = *base;
prot[TLS_HW_RECORD][TLS_HW_RECORD].hash = tls_hw_hash;
prot[TLS_HW_RECORD][TLS_HW_RECORD].unhash = tls_hw_unhash;
- prot[TLS_HW_RECORD][TLS_HW_RECORD].close = tls_sk_proto_close;
}
static int tls_init(struct sock *sk)
@@ -773,7 +780,7 @@ static int tls_init(struct sock *sk)
int rc = 0;
if (tls_hw_prot(sk))
- goto out;
+ return 0;
/* The TLS ulp is currently supported only for TCP sockets
* in ESTABLISHED state.
@@ -784,21 +791,38 @@ static int tls_init(struct sock *sk)
if (sk->sk_state != TCP_ESTABLISHED)
return -ENOTSUPP;
+ tls_build_proto(sk);
+
/* allocate tls context */
+ write_lock_bh(&sk->sk_callback_lock);
ctx = create_ctx(sk);
if (!ctx) {
rc = -ENOMEM;
goto out;
}
- tls_build_proto(sk);
ctx->tx_conf = TLS_BASE;
ctx->rx_conf = TLS_BASE;
+ ctx->sk_proto = sk->sk_prot;
update_sk_prot(sk, ctx);
out:
+ write_unlock_bh(&sk->sk_callback_lock);
return rc;
}
+static void tls_update(struct sock *sk, struct proto *p)
+{
+ struct tls_context *ctx;
+
+ ctx = tls_get_ctx(sk);
+ if (likely(ctx)) {
+ ctx->sk_proto_close = p->close;
+ ctx->sk_proto = p;
+ } else {
+ sk->sk_prot = p;
+ }
+}
+
void tls_register_device(struct tls_device *device)
{
spin_lock_bh(&device_spinlock);
@@ -819,6 +843,7 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
.name = "tls",
.owner = THIS_MODULE,
.init = tls_init,
+ .update = tls_update,
};
static int __init tls_register(void)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 53b4ad94e74a..91d21b048a9b 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2054,7 +2054,16 @@ static void tls_data_ready(struct sock *sk)
}
}
-void tls_sw_free_resources_tx(struct sock *sk)
+void tls_sw_cancel_work_tx(struct tls_context *tls_ctx)
+{
+ struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+
+ set_bit(BIT_TX_CLOSING, &ctx->tx_bitmask);
+ set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask);
+ cancel_delayed_work_sync(&ctx->tx_work.work);
+}
+
+void tls_sw_release_resources_tx(struct sock *sk)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
@@ -2065,11 +2074,6 @@ void tls_sw_free_resources_tx(struct sock *sk)
if (atomic_read(&ctx->encrypt_pending))
crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
- release_sock(sk);
- cancel_delayed_work_sync(&ctx->tx_work.work);
- lock_sock(sk);
-
- /* Tx whatever records we can transmit and abandon the rest */
tls_tx_records(sk, -1);
/* Free up un-sent records in tx_list. First, free
@@ -2092,6 +2096,11 @@ void tls_sw_free_resources_tx(struct sock *sk)
crypto_free_aead(ctx->aead_send);
tls_free_open_rec(sk);
+}
+
+void tls_sw_free_ctx_tx(struct tls_context *tls_ctx)
+{
+ struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
kfree(ctx);
}
@@ -2110,25 +2119,40 @@ void tls_sw_release_resources_rx(struct sock *sk)
skb_queue_purge(&ctx->rx_list);
crypto_free_aead(ctx->aead_recv);
strp_stop(&ctx->strp);
- write_lock_bh(&sk->sk_callback_lock);
- sk->sk_data_ready = ctx->saved_data_ready;
- write_unlock_bh(&sk->sk_callback_lock);
- release_sock(sk);
- strp_done(&ctx->strp);
- lock_sock(sk);
+ /* If tls_sw_strparser_arm() was not called (cleanup paths)
+ * we still want to strp_stop(), but sk->sk_data_ready was
+ * never swapped.
+ */
+ if (ctx->saved_data_ready) {
+ write_lock_bh(&sk->sk_callback_lock);
+ sk->sk_data_ready = ctx->saved_data_ready;
+ write_unlock_bh(&sk->sk_callback_lock);
+ }
}
}
-void tls_sw_free_resources_rx(struct sock *sk)
+void tls_sw_strparser_done(struct tls_context *tls_ctx)
{
- struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
- tls_sw_release_resources_rx(sk);
+ strp_done(&ctx->strp);
+}
+
+void tls_sw_free_ctx_rx(struct tls_context *tls_ctx)
+{
+ struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
kfree(ctx);
}
+void tls_sw_free_resources_rx(struct sock *sk)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+
+ tls_sw_release_resources_rx(sk);
+ tls_sw_free_ctx_rx(tls_ctx);
+}
+
/* The work handler to transmitt the encrypted records in tx_list */
static void tx_work_handler(struct work_struct *work)
{
@@ -2137,11 +2161,17 @@ static void tx_work_handler(struct work_struct *work)
struct tx_work, work);
struct sock *sk = tx_work->sk;
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+ struct tls_sw_context_tx *ctx;
- if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
+ if (unlikely(!tls_ctx))
return;
+ ctx = tls_sw_ctx_tx(tls_ctx);
+ if (test_bit(BIT_TX_CLOSING, &ctx->tx_bitmask))
+ return;
+
+ if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
+ return;
lock_sock(sk);
tls_tx_records(sk, -1);
release_sock(sk);
@@ -2160,6 +2190,18 @@ void tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
}
}
+void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx)
+{
+ struct tls_sw_context_rx *rx_ctx = tls_sw_ctx_rx(tls_ctx);
+
+ write_lock_bh(&sk->sk_callback_lock);
+ rx_ctx->saved_data_ready = sk->sk_data_ready;
+ sk->sk_data_ready = tls_data_ready;
+ write_unlock_bh(&sk->sk_callback_lock);
+
+ strp_check_rcv(&rx_ctx->strp);
+}
+
int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
@@ -2357,13 +2399,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
cb.parse_msg = tls_read_size;
strp_init(&sw_ctx_rx->strp, sk, &cb);
-
- write_lock_bh(&sk->sk_callback_lock);
- sw_ctx_rx->saved_data_ready = sk->sk_data_ready;
- sk->sk_data_ready = tls_data_ready;
- write_unlock_bh(&sk->sk_callback_lock);
-
- strp_check_rcv(&sw_ctx_rx->strp);
}
goto out;
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index f2084e3f7aa4..9d864ebeb7b3 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -312,6 +312,11 @@ static void hvs_close_connection(struct vmbus_channel *chan)
lock_sock(sk);
hvs_do_close_lock_held(vsock_sk(sk), true);
release_sock(sk);
+
+ /* Release the refcnt for the channel that's opened in
+ * hvs_open_connection().
+ */
+ sock_put(sk);
}
static void hvs_open_connection(struct vmbus_channel *chan)
@@ -407,6 +412,9 @@ static void hvs_open_connection(struct vmbus_channel *chan)
}
set_per_channel_state(chan, conn_from_host ? new : sk);
+
+ /* This reference will be dropped by hvs_close_connection(). */
+ sock_hold(conn_from_host ? new : sk);
vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
/* Set the pending send size to max packet size to always get
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 578cce4fbe6c..67f8360dfcee 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -166,7 +166,7 @@ config CFG80211_DEFAULT_PS
If this causes your applications to misbehave you should fix your
applications instead -- they need to register their network
- latency requirement, see Documentation/power/pm_qos_interface.txt.
+ latency requirement, see Documentation/power/pm_qos_interface.rst.
config CFG80211_DEBUGFS
bool "cfg80211 DebugFS entries"
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 45d9afcff6d5..32b3c719fdfc 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1410,10 +1410,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
}
break;
case NETDEV_PRE_UP:
- if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype)) &&
- !(wdev->iftype == NL80211_IFTYPE_AP_VLAN &&
- rdev->wiphy.flags & WIPHY_FLAG_4ADDR_AP &&
- wdev->use_4addr))
+ if (!cfg80211_iftype_allowed(wdev->wiphy, wdev->iftype,
+ wdev->use_4addr, 0))
return notifier_from_errno(-EOPNOTSUPP);
if (rfkill_blocked(rdev->rfkill))
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index fc83dd179c1a..fd05ae1437a9 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3484,9 +3484,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
return err;
}
- if (!(rdev->wiphy.interface_modes & (1 << type)) &&
- !(type == NL80211_IFTYPE_AP_VLAN && params.use_4addr &&
- rdev->wiphy.flags & WIPHY_FLAG_4ADDR_AP))
+ if (!cfg80211_iftype_allowed(&rdev->wiphy, type, params.use_4addr, 0))
return -EOPNOTSUPP;
err = nl80211_parse_mon_options(rdev, type, info, &params);
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 1c39d6a2e850..d0e35b7b9e35 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1697,7 +1697,7 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
num_interfaces += params->iftype_num[iftype];
if (params->iftype_num[iftype] > 0 &&
- !(wiphy->software_iftypes & BIT(iftype)))
+ !cfg80211_iftype_allowed(wiphy, iftype, 0, 1))
used_iftypes |= BIT(iftype);
}
@@ -1719,7 +1719,7 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
return -ENOMEM;
for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
- if (wiphy->software_iftypes & BIT(iftype))
+ if (cfg80211_iftype_allowed(wiphy, iftype, 0, 1))
continue;
for (j = 0; j < c->n_limits; j++) {
all_iftypes |= limits[j].types;
@@ -2072,3 +2072,26 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
return max_vht_nss;
}
EXPORT_SYMBOL(ieee80211_get_vht_max_nss);
+
+bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype,
+ bool is_4addr, u8 check_swif)
+
+{
+ bool is_vlan = iftype == NL80211_IFTYPE_AP_VLAN;
+
+ switch (check_swif) {
+ case 0:
+ if (is_vlan && is_4addr)
+ return wiphy->flags & WIPHY_FLAG_4ADDR_AP;
+ return wiphy->interface_modes & BIT(iftype);
+ case 1:
+ if (!(wiphy->software_iftypes & BIT(iftype)) && is_vlan)
+ return wiphy->flags & WIPHY_FLAG_4ADDR_AP;
+ return wiphy->software_iftypes & BIT(iftype);
+ default:
+ break;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL(cfg80211_iftype_allowed);
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 20c91f02d3d8..83de74ca729a 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -87,21 +87,20 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
struct netdev_bpf bpf;
int err = 0;
+ ASSERT_RTNL();
+
force_zc = flags & XDP_ZEROCOPY;
force_copy = flags & XDP_COPY;
if (force_zc && force_copy)
return -EINVAL;
- rtnl_lock();
- if (xdp_get_umem_from_qid(dev, queue_id)) {
- err = -EBUSY;
- goto out_rtnl_unlock;
- }
+ if (xdp_get_umem_from_qid(dev, queue_id))
+ return -EBUSY;
err = xdp_reg_umem_at_qid(dev, umem, queue_id);
if (err)
- goto out_rtnl_unlock;
+ return err;
umem->dev = dev;
umem->queue_id = queue_id;
@@ -110,7 +109,7 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
if (force_copy)
/* For copy-mode, we are done. */
- goto out_rtnl_unlock;
+ return 0;
if (!dev->netdev_ops->ndo_bpf ||
!dev->netdev_ops->ndo_xsk_async_xmit) {
@@ -125,7 +124,6 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
err = dev->netdev_ops->ndo_bpf(dev, &bpf);
if (err)
goto err_unreg_umem;
- rtnl_unlock();
umem->zc = true;
return 0;
@@ -135,8 +133,6 @@ err_unreg_umem:
err = 0; /* fallback to copy mode */
if (err)
xdp_clear_umem_at_qid(dev, queue_id);
-out_rtnl_unlock:
- rtnl_unlock();
return err;
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index d4d6f10aa936..59b57d708697 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -240,6 +240,9 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
mutex_lock(&xs->mutex);
+ if (xs->queue_id >= xs->dev->real_num_tx_queues)
+ goto out;
+
while (xskq_peek_desc(xs->tx, &desc)) {
char *buffer;
u64 addr;
@@ -250,12 +253,6 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
goto out;
}
- if (xskq_reserve_addr(xs->umem->cq))
- goto out;
-
- if (xs->queue_id >= xs->dev->real_num_tx_queues)
- goto out;
-
len = desc.len;
skb = sock_alloc_send_skb(sk, len, 1, &err);
if (unlikely(!skb)) {
@@ -267,7 +264,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
addr = desc.addr;
buffer = xdp_umem_get_data(xs->umem, addr);
err = skb_store_bits(skb, 0, buffer, len);
- if (unlikely(err)) {
+ if (unlikely(err) || xskq_reserve_addr(xs->umem->cq)) {
kfree_skb(skb);
goto out;
}
@@ -433,6 +430,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY))
return -EINVAL;
+ rtnl_lock();
mutex_lock(&xs->mutex);
if (xs->state != XSK_READY) {
err = -EBUSY;
@@ -518,6 +516,7 @@ out_unlock:
xs->state = XSK_BOUND;
out_release:
mutex_unlock(&xs->mutex);
+ rtnl_unlock();
return err;
}