aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/atm/signaling.c2
-rw-r--r--net/atm/svc.c2
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/ax25/ax25_in.c2
-rw-r--r--net/batman-adv/bat_v.c1
-rw-r--r--net/batman-adv/bat_v_ogm.c34
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/multicast.c2
-rw-r--r--net/batman-adv/soft-interface.c5
-rw-r--r--net/batman-adv/types.h3
-rw-r--r--net/bluetooth/af_bluetooth.c4
-rw-r--r--net/bluetooth/hci_conn.c8
-rw-r--r--net/bluetooth/hci_core.c13
-rw-r--r--net/bluetooth/smp.c6
-rw-r--r--net/bpf/test_run.c9
-rw-r--r--net/bridge/br_fdb.c157
-rw-r--r--net/bridge/br_input.c7
-rw-r--r--net/bridge/br_private.h24
-rw-r--r--net/bridge/br_switchdev.c12
-rw-r--r--net/caif/Kconfig10
-rw-r--r--net/core/dev.c408
-rw-r--r--net/core/devlink.c319
-rw-r--r--net/core/fib_notifier.c95
-rw-r--r--net/core/fib_rules.c23
-rw-r--r--net/core/filter.c19
-rw-r--r--net/core/flow_dissector.c155
-rw-r--r--net/core/gen_estimator.c4
-rw-r--r--net/core/gen_stats.c12
-rw-r--r--net/core/neighbour.c8
-rw-r--r--net/core/net-procfs.c4
-rw-r--r--net/core/pktgen.c1
-rw-r--r--net/core/rtnetlink.c206
-rw-r--r--net/core/sock.c16
-rw-r--r--net/core/xdp.c2
-rw-r--r--net/dccp/proto.c2
-rw-r--r--net/decnet/af_decnet.c2
-rw-r--r--net/decnet/dn_nsp_in.c2
-rw-r--r--net/dsa/dsa.c93
-rw-r--r--net/dsa/dsa2.c384
-rw-r--r--net/dsa/dsa_priv.h23
-rw-r--r--net/dsa/port.c13
-rw-r--r--net/dsa/slave.c25
-rw-r--r--net/dsa/switch.c4
-rw-r--r--net/dsa/tag_8021q.c11
-rw-r--r--net/ethernet/eth.c7
-rw-r--r--net/ieee802154/nl802154.c39
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/fib_notifier.c13
-rw-r--r--net/ipv4/fib_rules.c5
-rw-r--r--net/ipv4/fib_trie.c44
-rw-r--r--net/ipv4/icmp.c14
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_diag.c15
-rw-r--r--net/ipv4/inetpeer.c12
-rw-r--r--net/ipv4/ip_input.c3
-rw-r--r--net/ipv4/ip_tunnel_core.c382
-rw-r--r--net/ipv4/ipconfig.c10
-rw-r--r--net/ipv4/ipmr.c13
-rw-r--r--net/ipv4/ipmr_base.c30
-rw-r--r--net/ipv4/netfilter/nf_socket_ipv4.c10
-rw-r--r--net/ipv4/route.c5
-rw-r--r--net/ipv4/syncookies.c4
-rw-r--r--net/ipv4/tcp.c32
-rw-r--r--net/ipv4/tcp_diag.c4
-rw-r--r--net/ipv4/tcp_fastopen.c5
-rw-r--r--net/ipv4/tcp_input.c4
-rw-r--r--net/ipv4/tcp_ipv4.c8
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv6/addrconf.c7
-rw-r--r--net/ipv6/fib6_notifier.c11
-rw-r--r--net/ipv6/fib6_rules.c5
-rw-r--r--net/ipv6/icmp.c22
-rw-r--r--net/ipv6/ip6_fib.c50
-rw-r--r--net/ipv6/ip6_input.c3
-rw-r--r--net/ipv6/ip6mr.c13
-rw-r--r--net/ipv6/netfilter/nf_tproxy_ipv6.c2
-rw-r--r--net/ipv6/route.c9
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/llc/af_llc.c4
-rw-r--r--net/mac80211/agg-tx.c9
-rw-r--r--net/mac80211/cfg.c2
-rw-r--r--net/mac80211/ibss.c9
-rw-r--r--net/mac80211/mlme.c103
-rw-r--r--net/mac80211/rc80211_minstrel.c48
-rw-r--r--net/mac80211/rc80211_minstrel.h57
-rw-r--r--net/mac80211/rc80211_minstrel_debugfs.c8
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c73
-rw-r--r--net/mac80211/rc80211_minstrel_ht.h2
-rw-r--r--net/mac80211/rc80211_minstrel_ht_debugfs.c8
-rw-r--r--net/mac80211/tx.c64
-rw-r--r--net/netfilter/core.c20
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c26
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c18
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c41
-rw-r--r--net/netfilter/ipset/ip_set_core.c212
-rw-r--r--net/netfilter/ipset/ip_set_getport.c28
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h4
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmac.c8
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c8
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c8
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c8
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c24
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c6
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c24
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c24
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c28
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c24
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c28
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c47
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_ovf.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c18
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_conntrack_ecache.c23
-rw-r--r--net/netfilter/nf_conntrack_extend.c21
-rw-r--r--net/netfilter/nf_conntrack_netlink.c76
-rw-r--r--net/netfilter/nf_conntrack_proto_icmp.c6
-rw-r--r--net/netfilter/nf_tables_api.c572
-rw-r--r--net/netfilter/nf_tables_offload.c188
-rw-r--r--net/netfilter/nft_chain_filter.c45
-rw-r--r--net/netfilter/xt_HMARK.c6
-rw-r--r--net/netlink/genetlink.c303
-rw-r--r--net/nfc/netlink.c17
-rw-r--r--net/openvswitch/actions.c2
-rw-r--r--net/openvswitch/conntrack.c21
-rw-r--r--net/openvswitch/datapath.c65
-rw-r--r--net/openvswitch/flow.c20
-rw-r--r--net/openvswitch/flow.h10
-rw-r--r--net/openvswitch/flow_netlink.c87
-rw-r--r--net/openvswitch/flow_table.c381
-rw-r--r--net/openvswitch/flow_table.h19
-rw-r--r--net/openvswitch/vport.c5
-rw-r--r--net/packet/af_packet.c12
-rw-r--r--net/qrtr/tun.c6
-rw-r--r--net/rds/ib.c10
-rw-r--r--net/rds/ib.h15
-rw-r--r--net/rds/ib_cm.c167
-rw-r--r--net/rds/ib_recv.c13
-rw-r--r--net/rds/ib_send.c19
-rw-r--r--net/rose/af_rose.c4
-rw-r--r--net/rxrpc/peer_object.c2
-rw-r--r--net/sched/act_api.c57
-rw-r--r--net/sched/act_bpf.c5
-rw-r--r--net/sched/act_connmark.c4
-rw-r--r--net/sched/act_csum.c10
-rw-r--r--net/sched/act_ct.c16
-rw-r--r--net/sched/act_ctinfo.c4
-rw-r--r--net/sched/act_gact.c21
-rw-r--r--net/sched/act_ife.c5
-rw-r--r--net/sched/act_ipt.c12
-rw-r--r--net/sched/act_mirred.c19
-rw-r--r--net/sched/act_mpls.c5
-rw-r--r--net/sched/act_nat.c8
-rw-r--r--net/sched/act_pedit.c5
-rw-r--r--net/sched/act_police.c14
-rw-r--r--net/sched/act_sample.c4
-rw-r--r--net/sched/act_simple.c7
-rw-r--r--net/sched/act_skbedit.c4
-rw-r--r--net/sched/act_skbmod.c4
-rw-r--r--net/sched/act_tunnel_key.c9
-rw-r--r--net/sched/act_vlan.c16
-rw-r--r--net/sched/em_meta.c4
-rw-r--r--net/sched/sch_fq.c3
-rw-r--r--net/sched/sch_fq_codel.c1
-rw-r--r--net/sched/sch_generic.c18
-rw-r--r--net/sctp/associola.c60
-rw-r--r--net/sctp/chunk.c40
-rw-r--r--net/sctp/diag.c4
-rw-r--r--net/sctp/endpointola.c2
-rw-r--r--net/sctp/protocol.c6
-rw-r--r--net/sctp/sm_sideeffect.c5
-rw-r--r--net/sctp/socket.c151
-rw-r--r--net/sctp/sysctl.c22
-rw-r--r--net/sctp/ulpevent.c57
-rw-r--r--net/smc/af_smc.c4
-rw-r--r--net/smc/smc.h1
-rw-r--r--net/smc/smc_cdc.c4
-rw-r--r--net/smc/smc_close.c70
-rw-r--r--net/smc/smc_close.h2
-rw-r--r--net/smc/smc_core.c238
-rw-r--r--net/smc/smc_core.h9
-rw-r--r--net/smc/smc_ib.c15
-rw-r--r--net/smc/smc_ib.h1
-rw-r--r--net/smc/smc_ism.c5
-rw-r--r--net/smc/smc_llc.c2
-rw-r--r--net/smc/smc_pnet.c5
-rw-r--r--net/smc/smc_rx.c10
-rw-r--r--net/smc/smc_tx.c26
-rw-r--r--net/smc/smc_wr.c10
-rw-r--r--net/tipc/Kconfig15
-rw-r--r--net/tipc/Makefile1
-rw-r--r--net/tipc/bcast.c2
-rw-r--r--net/tipc/bearer.c49
-rw-r--r--net/tipc/bearer.h6
-rw-r--r--net/tipc/core.c30
-rw-r--r--net/tipc/core.h14
-rw-r--r--net/tipc/crypto.c1986
-rw-r--r--net/tipc/crypto.h167
-rw-r--r--net/tipc/discover.c4
-rw-r--r--net/tipc/link.c107
-rw-r--r--net/tipc/link.h1
-rw-r--r--net/tipc/msg.c221
-rw-r--r--net/tipc/msg.h77
-rw-r--r--net/tipc/name_distr.c2
-rw-r--r--net/tipc/netlink.c39
-rw-r--r--net/tipc/netlink.h1
-rw-r--r--net/tipc/netlink_compat.c28
-rw-r--r--net/tipc/node.c488
-rw-r--r--net/tipc/node.h25
-rw-r--r--net/tipc/socket.c129
-rw-r--r--net/tipc/sysctl.c11
-rw-r--r--net/tipc/udp_media.c7
-rw-r--r--net/tls/Kconfig10
-rw-r--r--net/tls/Makefile5
-rw-r--r--net/tls/tls_device.c46
-rw-r--r--net/tls/tls_main.c172
-rw-r--r--net/tls/tls_proc.c47
-rw-r--r--net/tls/tls_sw.c18
-rw-r--r--net/tls/tls_toe.c139
-rw-r--r--net/tls/trace.c10
-rw-r--r--net/tls/trace.h202
-rw-r--r--net/unix/af_unix.c6
-rw-r--r--net/vmw_vsock/af_vsock.c8
-rw-r--r--net/vmw_vsock/hyperv_transport.c24
-rw-r--r--net/vmw_vsock/virtio_transport_common.c57
-rw-r--r--net/vmw_vsock/vmci_transport.c2
-rw-r--r--net/wireless/nl80211.c17
-rw-r--r--net/wireless/reg.h2
-rw-r--r--net/x25/af_x25.c4
-rw-r--r--net/xdp/xsk.c33
234 files changed, 8667 insertions, 2614 deletions
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 6c11cdf4dd4c..fbd0c5e7b299 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -109,7 +109,7 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb)
dev_kfree_skb(skb);
goto as_indicate_complete;
}
- sk->sk_ack_backlog++;
+ sk_acceptq_added(sk);
skb_queue_tail(&sk->sk_receive_queue, skb);
pr_debug("waking sk_sleep(sk) 0x%p\n", sk_sleep(sk));
sk->sk_state_change(sk);
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 908cbb8654f5..ba144d035e3d 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -381,7 +381,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags,
msg->pvc.sap_addr.vpi,
msg->pvc.sap_addr.vci);
dev_kfree_skb(skb);
- sk->sk_ack_backlog--;
+ sk_acceptq_removed(sk);
if (error) {
sigd_enq2(NULL, as_reject, old_vcc, NULL, NULL,
&old_vcc->qos, error);
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index bb222b882b67..324306d6fde0 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1384,7 +1384,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags,
/* Now attach up the new socket */
kfree_skb(skb);
- sk->sk_ack_backlog--;
+ sk_acceptq_removed(sk);
newsock->state = SS_CONNECTED;
out:
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index dcdbaeeb2358..cd6afe895db9 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -356,7 +356,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
make->sk_state = TCP_ESTABLISHED;
- sk->sk_ack_backlog++;
+ sk_acceptq_added(sk);
bh_unlock_sock(sk);
} else {
if (!mine)
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 64054edc2e3c..4ff6cf1ecae7 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -1085,7 +1085,6 @@ void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface)
hard_iface->bat_v.aggr_len = 0;
skb_queue_head_init(&hard_iface->bat_v.aggr_list);
- spin_lock_init(&hard_iface->bat_v.aggr_list_lock);
INIT_DELAYED_WORK(&hard_iface->bat_v.aggr_wq,
batadv_v_ogm_aggr_work);
}
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 8033f24f506c..714ce56cfcc8 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -152,7 +152,7 @@ static unsigned int batadv_v_ogm_len(struct sk_buff *skb)
* @skb: the OGM to check
* @hard_iface: the interface to use to send the OGM
*
- * Caller needs to hold the hard_iface->bat_v.aggr_list_lock.
+ * Caller needs to hold the hard_iface->bat_v.aggr_list.lock.
*
* Return: True, if the given OGMv2 packet still fits, false otherwise.
*/
@@ -163,7 +163,7 @@ static bool batadv_v_ogm_queue_left(struct sk_buff *skb,
BATADV_MAX_AGGREGATION_BYTES);
unsigned int ogm_len = batadv_v_ogm_len(skb);
- lockdep_assert_held(&hard_iface->bat_v.aggr_list_lock);
+ lockdep_assert_held(&hard_iface->bat_v.aggr_list.lock);
return hard_iface->bat_v.aggr_len + ogm_len <= max;
}
@@ -174,17 +174,13 @@ static bool batadv_v_ogm_queue_left(struct sk_buff *skb,
*
* Empties the OGMv2 aggregation queue and frees all the skbs it contained.
*
- * Caller needs to hold the hard_iface->bat_v.aggr_list_lock.
+ * Caller needs to hold the hard_iface->bat_v.aggr_list.lock.
*/
static void batadv_v_ogm_aggr_list_free(struct batadv_hard_iface *hard_iface)
{
- struct sk_buff *skb;
-
- lockdep_assert_held(&hard_iface->bat_v.aggr_list_lock);
-
- while ((skb = skb_dequeue(&hard_iface->bat_v.aggr_list)))
- kfree_skb(skb);
+ lockdep_assert_held(&hard_iface->bat_v.aggr_list.lock);
+ __skb_queue_purge(&hard_iface->bat_v.aggr_list);
hard_iface->bat_v.aggr_len = 0;
}
@@ -197,7 +193,7 @@ static void batadv_v_ogm_aggr_list_free(struct batadv_hard_iface *hard_iface)
*
* The aggregation queue is empty after this call.
*
- * Caller needs to hold the hard_iface->bat_v.aggr_list_lock.
+ * Caller needs to hold the hard_iface->bat_v.aggr_list.lock.
*/
static void batadv_v_ogm_aggr_send(struct batadv_hard_iface *hard_iface)
{
@@ -206,7 +202,7 @@ static void batadv_v_ogm_aggr_send(struct batadv_hard_iface *hard_iface)
unsigned int ogm_len;
struct sk_buff *skb;
- lockdep_assert_held(&hard_iface->bat_v.aggr_list_lock);
+ lockdep_assert_held(&hard_iface->bat_v.aggr_list.lock);
if (!aggr_len)
return;
@@ -220,7 +216,7 @@ static void batadv_v_ogm_aggr_send(struct batadv_hard_iface *hard_iface)
skb_reserve(skb_aggr, ETH_HLEN + NET_IP_ALIGN);
skb_reset_network_header(skb_aggr);
- while ((skb = skb_dequeue(&hard_iface->bat_v.aggr_list))) {
+ while ((skb = __skb_dequeue(&hard_iface->bat_v.aggr_list))) {
hard_iface->bat_v.aggr_len -= batadv_v_ogm_len(skb);
ogm_len = batadv_v_ogm_len(skb);
@@ -247,13 +243,13 @@ static void batadv_v_ogm_queue_on_if(struct sk_buff *skb,
return;
}
- spin_lock_bh(&hard_iface->bat_v.aggr_list_lock);
+ spin_lock_bh(&hard_iface->bat_v.aggr_list.lock);
if (!batadv_v_ogm_queue_left(skb, hard_iface))
batadv_v_ogm_aggr_send(hard_iface);
hard_iface->bat_v.aggr_len += batadv_v_ogm_len(skb);
- skb_queue_tail(&hard_iface->bat_v.aggr_list, skb);
- spin_unlock_bh(&hard_iface->bat_v.aggr_list_lock);
+ __skb_queue_tail(&hard_iface->bat_v.aggr_list, skb);
+ spin_unlock_bh(&hard_iface->bat_v.aggr_list.lock);
}
/**
@@ -392,9 +388,9 @@ void batadv_v_ogm_aggr_work(struct work_struct *work)
batv = container_of(work, struct batadv_hard_iface_bat_v, aggr_wq.work);
hard_iface = container_of(batv, struct batadv_hard_iface, bat_v);
- spin_lock_bh(&hard_iface->bat_v.aggr_list_lock);
+ spin_lock_bh(&hard_iface->bat_v.aggr_list.lock);
batadv_v_ogm_aggr_send(hard_iface);
- spin_unlock_bh(&hard_iface->bat_v.aggr_list_lock);
+ spin_unlock_bh(&hard_iface->bat_v.aggr_list.lock);
batadv_v_ogm_start_queue_timer(hard_iface);
}
@@ -425,9 +421,9 @@ void batadv_v_ogm_iface_disable(struct batadv_hard_iface *hard_iface)
{
cancel_delayed_work_sync(&hard_iface->bat_v.aggr_wq);
- spin_lock_bh(&hard_iface->bat_v.aggr_list_lock);
+ spin_lock_bh(&hard_iface->bat_v.aggr_list.lock);
batadv_v_ogm_aggr_list_free(hard_iface);
- spin_unlock_bh(&hard_iface->bat_v.aggr_list_lock);
+ spin_unlock_bh(&hard_iface->bat_v.aggr_list.lock);
}
/**
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 6967f2e4c3f4..c7b340ddd0e7 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2019.4"
+#define BATADV_SOURCE_VERSION "2019.5"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 1d5bdf3a4b65..f9ec8e7507b6 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1421,7 +1421,7 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
if (*orig)
return BATADV_FORW_SINGLE;
- /* fall through */
+ fallthrough;
case 0:
return BATADV_FORW_NONE;
default:
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 5ee8e9a100f9..832e156c519e 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -22,7 +22,6 @@
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
-#include <linux/lockdep.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/percpu.h>
@@ -230,7 +229,7 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb,
break;
}
- /* fall through */
+ fallthrough;
case ETH_P_BATMAN:
goto dropped;
}
@@ -455,7 +454,7 @@ void batadv_interface_rx(struct net_device *soft_iface,
if (vhdr->h_vlan_encapsulated_proto != htons(ETH_P_BATMAN))
break;
- /* fall through */
+ fallthrough;
case ETH_P_BATMAN:
goto dropped;
}
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 4d7f1baee7b7..47718a82eaf2 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -130,9 +130,6 @@ struct batadv_hard_iface_bat_v {
/** @aggr_len: size of the OGM aggregate (excluding ethernet header) */
unsigned int aggr_len;
- /** @aggr_list_lock: protects aggr_list */
- spinlock_t aggr_list_lock;
-
/**
* @throughput_override: throughput override to disable link
* auto-detection
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 5f508c50649d..3fd124927d4d 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -173,7 +173,7 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
else
release_sock(sk);
- parent->sk_ack_backlog++;
+ sk_acceptq_added(parent);
}
EXPORT_SYMBOL(bt_accept_enqueue);
@@ -185,7 +185,7 @@ void bt_accept_unlink(struct sock *sk)
BT_DBG("sk %p state %d", sk, sk->sk_state);
list_del_init(&bt_sk(sk)->accept_q);
- bt_sk(sk)->parent->sk_ack_backlog--;
+ sk_acceptq_removed(bt_sk(sk)->parent);
bt_sk(sk)->parent = NULL;
sock_put(sk);
}
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index ad5b0ac1f9ce..7ff92dd4c53c 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -934,6 +934,14 @@ static void hci_req_directed_advertising(struct hci_request *req,
return;
memset(&cp, 0, sizeof(cp));
+
+ /* Some controllers might reject command if intervals are not
+ * within range for undirected advertising.
+ * BCM20702A0 is known to be affected by this.
+ */
+ cp.min_interval = cpu_to_le16(0x0020);
+ cp.max_interval = cpu_to_le16(0x0020);
+
cp.type = LE_ADV_DIRECT_IND;
cp.own_address_type = own_addr_type;
cp.direct_addr_type = conn->dst_type;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 04bc79359a17..0cc9ce917222 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -842,8 +842,8 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt)
if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) {
struct hci_cp_le_write_def_data_len cp;
- cp.tx_len = hdev->le_max_tx_len;
- cp.tx_time = hdev->le_max_tx_time;
+ cp.tx_len = cpu_to_le16(hdev->le_max_tx_len);
+ cp.tx_time = cpu_to_le16(hdev->le_max_tx_time);
hci_req_add(req, HCI_OP_LE_WRITE_DEF_DATA_LEN, sizeof(cp), &cp);
}
@@ -4440,7 +4440,14 @@ static void hci_rx_work(struct work_struct *work)
hci_send_to_sock(hdev, skb);
}
- if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
+ /* If the device has been opened in HCI_USER_CHANNEL,
+ * the userspace has exclusive access to device.
+ * When device is HCI_INIT, we still need to process
+ * the data packets to the driver in order
+ * to complete its setup().
+ */
+ if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
+ !test_bit(HCI_INIT, &hdev->flags)) {
kfree_skb(skb);
continue;
}
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 26e8cfad22b8..6b42be4b5861 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -502,15 +502,12 @@ bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16],
const bdaddr_t *bdaddr)
{
struct l2cap_chan *chan = hdev->smp_data;
- struct smp_dev *smp;
u8 hash[3];
int err;
if (!chan || !chan->data)
return false;
- smp = chan->data;
-
BT_DBG("RPA %pMR IRK %*phN", bdaddr, 16, irk);
err = smp_ah(irk, &bdaddr->b[3], hash);
@@ -523,14 +520,11 @@ bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16],
int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa)
{
struct l2cap_chan *chan = hdev->smp_data;
- struct smp_dev *smp;
int err;
if (!chan || !chan->data)
return -EOPNOTSUPP;
- smp = chan->data;
-
get_random_bytes(&rpa->b[3], 3);
rpa->b[5] &= 0x3f; /* Clear two most significant bits */
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 1153bbcdff72..0be4497cb832 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -218,10 +218,18 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
if (!range_is_zero(__skb, offsetof(struct __sk_buff, cb) +
FIELD_SIZEOF(struct __sk_buff, cb),
+ offsetof(struct __sk_buff, tstamp)))
+ return -EINVAL;
+
+ /* tstamp is allowed */
+
+ if (!range_is_zero(__skb, offsetof(struct __sk_buff, tstamp) +
+ FIELD_SIZEOF(struct __sk_buff, tstamp),
sizeof(struct __sk_buff)))
return -EINVAL;
skb->priority = __skb->priority;
+ skb->tstamp = __skb->tstamp;
memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN);
return 0;
@@ -235,6 +243,7 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
return;
__skb->priority = skb->priority;
+ __skb->tstamp = skb->tstamp;
memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN);
}
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index b1d3248c0252..4877a0db16c6 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -75,8 +75,9 @@ static inline unsigned long hold_time(const struct net_bridge *br)
static inline int has_expired(const struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb)
{
- return !fdb->is_static && !fdb->added_by_external_learn &&
- time_before_eq(fdb->updated + hold_time(br), jiffies);
+ return !test_bit(BR_FDB_STATIC, &fdb->flags) &&
+ !test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags) &&
+ time_before_eq(fdb->updated + hold_time(br), jiffies);
}
static void fdb_rcu_free(struct rcu_head *head)
@@ -197,7 +198,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f,
{
trace_fdb_delete(br, f);
- if (f->is_static)
+ if (test_bit(BR_FDB_STATIC, &f->flags))
fdb_del_hw_addr(br, f->key.addr.addr);
hlist_del_init_rcu(&f->fdb_node);
@@ -224,7 +225,7 @@ static void fdb_delete_local(struct net_bridge *br,
if (op != p && ether_addr_equal(op->dev->dev_addr, addr) &&
(!vid || br_vlan_find(vg, vid))) {
f->dst = op;
- f->added_by_user = 0;
+ clear_bit(BR_FDB_ADDED_BY_USER, &f->flags);
return;
}
}
@@ -235,7 +236,7 @@ static void fdb_delete_local(struct net_bridge *br,
if (p && ether_addr_equal(br->dev->dev_addr, addr) &&
(!vid || (v && br_vlan_should_use(v)))) {
f->dst = NULL;
- f->added_by_user = 0;
+ clear_bit(BR_FDB_ADDED_BY_USER, &f->flags);
return;
}
@@ -250,7 +251,8 @@ void br_fdb_find_delete_local(struct net_bridge *br,
spin_lock_bh(&br->hash_lock);
f = br_fdb_find(br, addr, vid);
- if (f && f->is_local && !f->added_by_user && f->dst == p)
+ if (f && test_bit(BR_FDB_LOCAL, &f->flags) &&
+ !test_bit(BR_FDB_ADDED_BY_USER, &f->flags) && f->dst == p)
fdb_delete_local(br, p, f);
spin_unlock_bh(&br->hash_lock);
}
@@ -265,7 +267,8 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
spin_lock_bh(&br->hash_lock);
vg = nbp_vlan_group(p);
hlist_for_each_entry(f, &br->fdb_list, fdb_node) {
- if (f->dst == p && f->is_local && !f->added_by_user) {
+ if (f->dst == p && test_bit(BR_FDB_LOCAL, &f->flags) &&
+ !test_bit(BR_FDB_ADDED_BY_USER, &f->flags)) {
/* delete old one */
fdb_delete_local(br, p, f);
@@ -306,7 +309,8 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
/* If old entry was unassociated with any port, then delete it. */
f = br_fdb_find(br, br->dev->dev_addr, 0);
- if (f && f->is_local && !f->dst && !f->added_by_user)
+ if (f && test_bit(BR_FDB_LOCAL, &f->flags) &&
+ !f->dst && !test_bit(BR_FDB_ADDED_BY_USER, &f->flags))
fdb_delete_local(br, NULL, f);
fdb_insert(br, NULL, newaddr, 0);
@@ -321,7 +325,8 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
if (!br_vlan_should_use(v))
continue;
f = br_fdb_find(br, br->dev->dev_addr, v->vid);
- if (f && f->is_local && !f->dst && !f->added_by_user)
+ if (f && test_bit(BR_FDB_LOCAL, &f->flags) &&
+ !f->dst && !test_bit(BR_FDB_ADDED_BY_USER, &f->flags))
fdb_delete_local(br, NULL, f);
fdb_insert(br, NULL, newaddr, v->vid);
}
@@ -346,7 +351,8 @@ void br_fdb_cleanup(struct work_struct *work)
hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
unsigned long this_timer;
- if (f->is_static || f->added_by_external_learn)
+ if (test_bit(BR_FDB_STATIC, &f->flags) ||
+ test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags))
continue;
this_timer = f->updated + delay;
if (time_after(this_timer, now)) {
@@ -373,7 +379,7 @@ void br_fdb_flush(struct net_bridge *br)
spin_lock_bh(&br->hash_lock);
hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) {
- if (!f->is_static)
+ if (!test_bit(BR_FDB_STATIC, &f->flags))
fdb_delete(br, f, true);
}
spin_unlock_bh(&br->hash_lock);
@@ -397,10 +403,11 @@ void br_fdb_delete_by_port(struct net_bridge *br,
continue;
if (!do_all)
- if (f->is_static || (vid && f->key.vlan_id != vid))
+ if (test_bit(BR_FDB_STATIC, &f->flags) ||
+ (vid && f->key.vlan_id != vid))
continue;
- if (f->is_local)
+ if (test_bit(BR_FDB_LOCAL, &f->flags))
fdb_delete_local(br, p, f);
else
fdb_delete(br, f, true);
@@ -469,8 +476,8 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
fe->port_no = f->dst->port_no;
fe->port_hi = f->dst->port_no >> 8;
- fe->is_local = f->is_local;
- if (!f->is_static)
+ fe->is_local = test_bit(BR_FDB_LOCAL, &f->flags);
+ if (!test_bit(BR_FDB_STATIC, &f->flags))
fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated);
++fe;
++num;
@@ -484,8 +491,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
struct net_bridge_port *source,
const unsigned char *addr,
__u16 vid,
- unsigned char is_local,
- unsigned char is_static)
+ unsigned long flags)
{
struct net_bridge_fdb_entry *fdb;
@@ -494,12 +500,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
memcpy(fdb->key.addr.addr, addr, ETH_ALEN);
fdb->dst = source;
fdb->key.vlan_id = vid;
- fdb->is_local = is_local;
- fdb->is_static = is_static;
- fdb->added_by_user = 0;
- fdb->added_by_external_learn = 0;
- fdb->offloaded = 0;
- fdb->is_sticky = 0;
+ fdb->flags = flags;
fdb->updated = fdb->used = jiffies;
if (rhashtable_lookup_insert_fast(&br->fdb_hash_tbl,
&fdb->rhnode,
@@ -526,14 +527,15 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
/* it is okay to have multiple ports with same
* address, just use the first one.
*/
- if (fdb->is_local)
+ if (test_bit(BR_FDB_LOCAL, &fdb->flags))
return 0;
br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n",
source ? source->dev->name : br->dev->name, addr, vid);
fdb_delete(br, fdb, true);
}
- fdb = fdb_create(br, source, addr, vid, 1, 1);
+ fdb = fdb_create(br, source, addr, vid,
+ BIT(BR_FDB_LOCAL) | BIT(BR_FDB_STATIC));
if (!fdb)
return -ENOMEM;
@@ -555,7 +557,7 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
}
void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
- const unsigned char *addr, u16 vid, bool added_by_user)
+ const unsigned char *addr, u16 vid, unsigned long flags)
{
struct net_bridge_fdb_entry *fdb;
bool fdb_modified = false;
@@ -564,15 +566,10 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
if (hold_time(br) == 0)
return;
- /* ignore packets unless we are using this port */
- if (!(source->state == BR_STATE_LEARNING ||
- source->state == BR_STATE_FORWARDING))
- return;
-
fdb = fdb_find_rcu(&br->fdb_hash_tbl, addr, vid);
if (likely(fdb)) {
/* attempt to update an entry for a local interface */
- if (unlikely(fdb->is_local)) {
+ if (unlikely(test_bit(BR_FDB_LOCAL, &fdb->flags))) {
if (net_ratelimit())
br_warn(br, "received packet on %s with own address as source address (addr:%pM, vlan:%u)\n",
source->dev->name, addr, vid);
@@ -580,30 +577,30 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
unsigned long now = jiffies;
/* fastpath: update of existing entry */
- if (unlikely(source != fdb->dst && !fdb->is_sticky)) {
+ if (unlikely(source != fdb->dst &&
+ !test_bit(BR_FDB_STICKY, &fdb->flags))) {
fdb->dst = source;
fdb_modified = true;
/* Take over HW learned entry */
- if (unlikely(fdb->added_by_external_learn))
- fdb->added_by_external_learn = 0;
+ if (unlikely(test_bit(BR_FDB_ADDED_BY_EXT_LEARN,
+ &fdb->flags)))
+ clear_bit(BR_FDB_ADDED_BY_EXT_LEARN,
+ &fdb->flags);
}
if (now != fdb->updated)
fdb->updated = now;
- if (unlikely(added_by_user))
- fdb->added_by_user = 1;
+ if (unlikely(test_bit(BR_FDB_ADDED_BY_USER, &flags)))
+ set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
if (unlikely(fdb_modified)) {
- trace_br_fdb_update(br, source, addr, vid, added_by_user);
+ trace_br_fdb_update(br, source, addr, vid, flags);
fdb_notify(br, fdb, RTM_NEWNEIGH, true);
}
}
} else {
spin_lock(&br->hash_lock);
- fdb = fdb_create(br, source, addr, vid, 0, 0);
+ fdb = fdb_create(br, source, addr, vid, flags);
if (fdb) {
- if (unlikely(added_by_user))
- fdb->added_by_user = 1;
- trace_br_fdb_update(br, source, addr, vid,
- added_by_user);
+ trace_br_fdb_update(br, source, addr, vid, flags);
fdb_notify(br, fdb, RTM_NEWNEIGH, true);
}
/* else we lose race and someone else inserts
@@ -616,9 +613,9 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
static int fdb_to_nud(const struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb)
{
- if (fdb->is_local)
+ if (test_bit(BR_FDB_LOCAL, &fdb->flags))
return NUD_PERMANENT;
- else if (fdb->is_static)
+ else if (test_bit(BR_FDB_STATIC, &fdb->flags))
return NUD_NOARP;
else if (has_expired(br, fdb))
return NUD_STALE;
@@ -648,11 +645,11 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
ndm->ndm_ifindex = fdb->dst ? fdb->dst->dev->ifindex : br->dev->ifindex;
ndm->ndm_state = fdb_to_nud(br, fdb);
- if (fdb->offloaded)
+ if (test_bit(BR_FDB_OFFLOADED, &fdb->flags))
ndm->ndm_flags |= NTF_OFFLOADED;
- if (fdb->added_by_external_learn)
+ if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags))
ndm->ndm_flags |= NTF_EXT_LEARNED;
- if (fdb->is_sticky)
+ if (test_bit(BR_FDB_STICKY, &fdb->flags))
ndm->ndm_flags |= NTF_STICKY;
if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.addr))
@@ -799,7 +796,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
const u8 *addr, u16 state, u16 flags, u16 vid,
u8 ndm_flags)
{
- u8 is_sticky = !!(ndm_flags & NTF_STICKY);
+ bool is_sticky = !!(ndm_flags & NTF_STICKY);
struct net_bridge_fdb_entry *fdb;
bool modified = false;
@@ -823,7 +820,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
if (!(flags & NLM_F_CREATE))
return -ENOENT;
- fdb = fdb_create(br, source, addr, vid, 0, 0);
+ fdb = fdb_create(br, source, addr, vid, 0);
if (!fdb)
return -ENOMEM;
@@ -840,34 +837,28 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
if (fdb_to_nud(br, fdb) != state) {
if (state & NUD_PERMANENT) {
- fdb->is_local = 1;
- if (!fdb->is_static) {
- fdb->is_static = 1;
+ set_bit(BR_FDB_LOCAL, &fdb->flags);
+ if (!test_and_set_bit(BR_FDB_STATIC, &fdb->flags))
fdb_add_hw_addr(br, addr);
- }
} else if (state & NUD_NOARP) {
- fdb->is_local = 0;
- if (!fdb->is_static) {
- fdb->is_static = 1;
+ clear_bit(BR_FDB_LOCAL, &fdb->flags);
+ if (!test_and_set_bit(BR_FDB_STATIC, &fdb->flags))
fdb_add_hw_addr(br, addr);
- }
} else {
- fdb->is_local = 0;
- if (fdb->is_static) {
- fdb->is_static = 0;
+ clear_bit(BR_FDB_LOCAL, &fdb->flags);
+ if (test_and_clear_bit(BR_FDB_STATIC, &fdb->flags))
fdb_del_hw_addr(br, addr);
- }
}
modified = true;
}
- if (is_sticky != fdb->is_sticky) {
- fdb->is_sticky = is_sticky;
+ if (is_sticky != test_bit(BR_FDB_STICKY, &fdb->flags)) {
+ change_bit(BR_FDB_STICKY, &fdb->flags);
modified = true;
}
- fdb->added_by_user = 1;
+ set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
fdb->used = jiffies;
if (modified) {
@@ -890,9 +881,12 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
br->dev->name);
return -EINVAL;
}
+ if (!nbp_state_should_learn(p))
+ return 0;
+
local_bh_disable();
rcu_read_lock();
- br_fdb_update(br, p, addr, vid, true);
+ br_fdb_update(br, p, addr, vid, BIT(BR_FDB_ADDED_BY_USER));
rcu_read_unlock();
local_bh_enable();
} else if (ndm->ndm_flags & NTF_EXT_LEARNED) {
@@ -1064,7 +1058,7 @@ int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p)
rcu_read_lock();
hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
/* We only care for static entries */
- if (!f->is_static)
+ if (!test_bit(BR_FDB_STATIC, &f->flags))
continue;
err = dev_uc_add(p->dev, f->key.addr.addr);
if (err)
@@ -1078,7 +1072,7 @@ done:
rollback:
hlist_for_each_entry_rcu(tmp, &br->fdb_list, fdb_node) {
/* We only care for static entries */
- if (!tmp->is_static)
+ if (!test_bit(BR_FDB_STATIC, &tmp->flags))
continue;
if (tmp == f)
break;
@@ -1097,7 +1091,7 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
rcu_read_lock();
hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
/* We only care for static entries */
- if (!f->is_static)
+ if (!test_bit(BR_FDB_STATIC, &f->flags))
continue;
dev_uc_del(p->dev, f->key.addr.addr);
@@ -1119,14 +1113,15 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
fdb = br_fdb_find(br, addr, vid);
if (!fdb) {
- fdb = fdb_create(br, p, addr, vid, 0, 0);
+ unsigned long flags = BIT(BR_FDB_ADDED_BY_EXT_LEARN);
+
+ if (swdev_notify)
+ flags |= BIT(BR_FDB_ADDED_BY_USER);
+ fdb = fdb_create(br, p, addr, vid, flags);
if (!fdb) {
err = -ENOMEM;
goto err_unlock;
}
- if (swdev_notify)
- fdb->added_by_user = 1;
- fdb->added_by_external_learn = 1;
fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
} else {
fdb->updated = jiffies;
@@ -1136,17 +1131,17 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
modified = true;
}
- if (fdb->added_by_external_learn) {
+ if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) {
/* Refresh entry */
fdb->used = jiffies;
- } else if (!fdb->added_by_user) {
+ } else if (!test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags)) {
/* Take over SW learned entry */
- fdb->added_by_external_learn = 1;
+ set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags);
modified = true;
}
if (swdev_notify)
- fdb->added_by_user = 1;
+ set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
if (modified)
fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
@@ -1168,7 +1163,7 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
spin_lock_bh(&br->hash_lock);
fdb = br_fdb_find(br, addr, vid);
- if (fdb && fdb->added_by_external_learn)
+ if (fdb && test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags))
fdb_delete(br, fdb, swdev_notify);
else
err = -ENOENT;
@@ -1186,8 +1181,8 @@ void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
spin_lock_bh(&br->hash_lock);
fdb = br_fdb_find(br, addr, vid);
- if (fdb)
- fdb->offloaded = offloaded;
+ if (fdb && offloaded != test_bit(BR_FDB_OFFLOADED, &fdb->flags))
+ change_bit(BR_FDB_OFFLOADED, &fdb->flags);
spin_unlock_bh(&br->hash_lock);
}
@@ -1206,7 +1201,7 @@ void br_fdb_clear_offload(const struct net_device *dev, u16 vid)
spin_lock_bh(&p->br->hash_lock);
hlist_for_each_entry(f, &p->br->fdb_list, fdb_node) {
if (f->dst == p && f->key.vlan_id == vid)
- f->offloaded = 0;
+ clear_bit(BR_FDB_OFFLOADED, &f->flags);
}
spin_unlock_bh(&p->br->hash_lock);
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 09b1dd8cd853..8944ceb47fe9 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -88,7 +88,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
/* insert into forwarding database after filtering to avoid spoofing */
br = p->br;
if (p->flags & BR_LEARNING)
- br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false);
+ br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0);
local_rcv = !!(br->dev->flags & IFF_PROMISC);
if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) {
@@ -151,7 +151,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if (dst) {
unsigned long now = jiffies;
- if (dst->is_local)
+ if (test_bit(BR_FDB_LOCAL, &dst->flags))
return br_pass_frame_up(skb);
if (now != dst->used)
@@ -182,9 +182,10 @@ static void __br_handle_local_finish(struct sk_buff *skb)
/* check if vlan is allowed, to avoid spoofing */
if ((p->flags & BR_LEARNING) &&
+ nbp_state_should_learn(p) &&
!br_opt_get(p->br, BROPT_NO_LL_LEARN) &&
br_should_learn(p, skb, &vid))
- br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
+ br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, 0);
}
/* note: already called with rcu_read_lock */
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index ce2ab14ee605..36b0367ca1e0 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -172,6 +172,16 @@ struct net_bridge_vlan_group {
u16 pvid;
};
+/* bridge fdb flags */
+enum {
+ BR_FDB_LOCAL,
+ BR_FDB_STATIC,
+ BR_FDB_STICKY,
+ BR_FDB_ADDED_BY_USER,
+ BR_FDB_ADDED_BY_EXT_LEARN,
+ BR_FDB_OFFLOADED,
+};
+
struct net_bridge_fdb_key {
mac_addr addr;
u16 vlan_id;
@@ -183,12 +193,7 @@ struct net_bridge_fdb_entry {
struct net_bridge_fdb_key key;
struct hlist_node fdb_node;
- unsigned char is_local:1,
- is_static:1,
- is_sticky:1,
- added_by_user:1,
- added_by_external_learn:1,
- offloaded:1;
+ unsigned long flags;
/* write-heavy members should not affect lookups */
unsigned long updated ____cacheline_aligned_in_smp;
@@ -495,6 +500,11 @@ static inline bool br_vlan_should_use(const struct net_bridge_vlan *v)
return true;
}
+static inline bool nbp_state_should_learn(const struct net_bridge_port *p)
+{
+ return p->state == BR_STATE_LEARNING || p->state == BR_STATE_FORWARDING;
+}
+
static inline int br_opt_get(const struct net_bridge *br,
enum net_bridge_opts opt)
{
@@ -566,7 +576,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count,
int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid);
void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
- const unsigned char *addr, u16 vid, bool added_by_user);
+ const unsigned char *addr, u16 vid, unsigned long flags);
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev, const unsigned char *addr, u16 vid);
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 921310d3cbae..015209bf44aa 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -129,15 +129,19 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
br_switchdev_fdb_call_notifiers(false, fdb->key.addr.addr,
fdb->key.vlan_id,
fdb->dst->dev,
- fdb->added_by_user,
- fdb->offloaded);
+ test_bit(BR_FDB_ADDED_BY_USER,
+ &fdb->flags),
+ test_bit(BR_FDB_OFFLOADED,
+ &fdb->flags));
break;
case RTM_NEWNEIGH:
br_switchdev_fdb_call_notifiers(true, fdb->key.addr.addr,
fdb->key.vlan_id,
fdb->dst->dev,
- fdb->added_by_user,
- fdb->offloaded);
+ test_bit(BR_FDB_ADDED_BY_USER,
+ &fdb->flags),
+ test_bit(BR_FDB_OFFLOADED,
+ &fdb->flags));
break;
}
}
diff --git a/net/caif/Kconfig b/net/caif/Kconfig
index eb83051c8330..b7532a79ca7a 100644
--- a/net/caif/Kconfig
+++ b/net/caif/Kconfig
@@ -13,11 +13,11 @@ menuconfig CAIF
with its modems. It is accessed from user space as sockets (PF_CAIF).
Say Y (or M) here if you build for a phone product (e.g. Android or
- MeeGo ) that uses CAIF as transport, if unsure say N.
+ MeeGo) that uses CAIF as transport. If unsure say N.
If you select to build it as module then CAIF_NETDEV also needs to be
- built as modules. You will also need to say yes to any CAIF physical
- devices that your platform requires.
+ built as a module. You will also need to say Y (or M) to any CAIF
+ physical devices that your platform requires.
See Documentation/networking/caif for a further explanation on how to
use and configure CAIF.
@@ -37,7 +37,7 @@ config CAIF_NETDEV
default CAIF
---help---
Say Y if you will be using a CAIF based GPRS network device.
- This can be either built-in or a loadable module,
+ This can be either built-in or a loadable module.
If you select to build it as a built-in then the main CAIF device must
also be a built-in.
If unsure say Y.
@@ -48,7 +48,7 @@ config CAIF_USB
default n
---help---
Say Y if you are using CAIF over USB CDC NCM.
- This can be either built-in or a loadable module,
+ This can be either built-in or a loadable module.
If you select to build it as a built-in then the main CAIF device must
also be a built-in.
If unsure say N.
diff --git a/net/core/dev.c b/net/core/dev.c
index 99ac84ff398f..1c799d486623 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -229,6 +229,122 @@ static inline void rps_unlock(struct softnet_data *sd)
#endif
}
+static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
+ const char *name)
+{
+ struct netdev_name_node *name_node;
+
+ name_node = kmalloc(sizeof(*name_node), GFP_KERNEL);
+ if (!name_node)
+ return NULL;
+ INIT_HLIST_NODE(&name_node->hlist);
+ name_node->dev = dev;
+ name_node->name = name;
+ return name_node;
+}
+
+static struct netdev_name_node *
+netdev_name_node_head_alloc(struct net_device *dev)
+{
+ struct netdev_name_node *name_node;
+
+ name_node = netdev_name_node_alloc(dev, dev->name);
+ if (!name_node)
+ return NULL;
+ INIT_LIST_HEAD(&name_node->list);
+ return name_node;
+}
+
+static void netdev_name_node_free(struct netdev_name_node *name_node)
+{
+ kfree(name_node);
+}
+
+static void netdev_name_node_add(struct net *net,
+ struct netdev_name_node *name_node)
+{
+ hlist_add_head_rcu(&name_node->hlist,
+ dev_name_hash(net, name_node->name));
+}
+
+static void netdev_name_node_del(struct netdev_name_node *name_node)
+{
+ hlist_del_rcu(&name_node->hlist);
+}
+
+static struct netdev_name_node *netdev_name_node_lookup(struct net *net,
+ const char *name)
+{
+ struct hlist_head *head = dev_name_hash(net, name);
+ struct netdev_name_node *name_node;
+
+ hlist_for_each_entry(name_node, head, hlist)
+ if (!strcmp(name_node->name, name))
+ return name_node;
+ return NULL;
+}
+
+static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net,
+ const char *name)
+{
+ struct hlist_head *head = dev_name_hash(net, name);
+ struct netdev_name_node *name_node;
+
+ hlist_for_each_entry_rcu(name_node, head, hlist)
+ if (!strcmp(name_node->name, name))
+ return name_node;
+ return NULL;
+}
+
+int netdev_name_node_alt_create(struct net_device *dev, const char *name)
+{
+ struct netdev_name_node *name_node;
+ struct net *net = dev_net(dev);
+
+ name_node = netdev_name_node_lookup(net, name);
+ if (name_node)
+ return -EEXIST;
+ name_node = netdev_name_node_alloc(dev, name);
+ if (!name_node)
+ return -ENOMEM;
+ netdev_name_node_add(net, name_node);
+ /* The node that holds dev->name acts as a head of per-device list. */
+ list_add_tail(&name_node->list, &dev->name_node->list);
+
+ return 0;
+}
+EXPORT_SYMBOL(netdev_name_node_alt_create);
+
+static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
+{
+ list_del(&name_node->list);
+ netdev_name_node_del(name_node);
+ kfree(name_node->name);
+ netdev_name_node_free(name_node);
+}
+
+int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
+{
+ struct netdev_name_node *name_node;
+ struct net *net = dev_net(dev);
+
+ name_node = netdev_name_node_lookup(net, name);
+ if (!name_node)
+ return -ENOENT;
+ __netdev_name_node_alt_destroy(name_node);
+
+ return 0;
+}
+EXPORT_SYMBOL(netdev_name_node_alt_destroy);
+
+static void netdev_name_node_alt_flush(struct net_device *dev)
+{
+ struct netdev_name_node *name_node, *tmp;
+
+ list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list)
+ __netdev_name_node_alt_destroy(name_node);
+}
+
/* Device list insertion */
static void list_netdevice(struct net_device *dev)
{
@@ -238,7 +354,7 @@ static void list_netdevice(struct net_device *dev)
write_lock_bh(&dev_base_lock);
list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
- hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
+ netdev_name_node_add(net, dev->name_node);
hlist_add_head_rcu(&dev->index_hlist,
dev_index_hash(net, dev->ifindex));
write_unlock_bh(&dev_base_lock);
@@ -256,7 +372,7 @@ static void unlist_netdevice(struct net_device *dev)
/* Unlink dev from the device chain */
write_lock_bh(&dev_base_lock);
list_del_rcu(&dev->dev_list);
- hlist_del_rcu(&dev->name_hlist);
+ netdev_name_node_del(dev->name_node);
hlist_del_rcu(&dev->index_hlist);
write_unlock_bh(&dev_base_lock);
@@ -652,14 +768,10 @@ EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
struct net_device *__dev_get_by_name(struct net *net, const char *name)
{
- struct net_device *dev;
- struct hlist_head *head = dev_name_hash(net, name);
+ struct netdev_name_node *node_name;
- hlist_for_each_entry(dev, head, name_hlist)
- if (!strncmp(dev->name, name, IFNAMSIZ))
- return dev;
-
- return NULL;
+ node_name = netdev_name_node_lookup(net, name);
+ return node_name ? node_name->dev : NULL;
}
EXPORT_SYMBOL(__dev_get_by_name);
@@ -677,14 +789,10 @@ EXPORT_SYMBOL(__dev_get_by_name);
struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
{
- struct net_device *dev;
- struct hlist_head *head = dev_name_hash(net, name);
-
- hlist_for_each_entry_rcu(dev, head, name_hlist)
- if (!strncmp(dev->name, name, IFNAMSIZ))
- return dev;
+ struct netdev_name_node *node_name;
- return NULL;
+ node_name = netdev_name_node_lookup_rcu(net, name);
+ return node_name ? node_name->dev : NULL;
}
EXPORT_SYMBOL(dev_get_by_name_rcu);
@@ -1060,8 +1168,8 @@ int dev_alloc_name(struct net_device *dev, const char *name)
}
EXPORT_SYMBOL(dev_alloc_name);
-int dev_get_valid_name(struct net *net, struct net_device *dev,
- const char *name)
+static int dev_get_valid_name(struct net *net, struct net_device *dev,
+ const char *name)
{
BUG_ON(!net);
@@ -1077,7 +1185,6 @@ int dev_get_valid_name(struct net *net, struct net_device *dev,
return 0;
}
-EXPORT_SYMBOL(dev_get_valid_name);
/**
* dev_change_name - change name of a device
@@ -1151,13 +1258,13 @@ rollback:
netdev_adjacent_rename_links(dev, oldname);
write_lock_bh(&dev_base_lock);
- hlist_del_rcu(&dev->name_hlist);
+ netdev_name_node_del(dev->name_node);
write_unlock_bh(&dev_base_lock);
synchronize_rcu();
write_lock_bh(&dev_base_lock);
- hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
+ netdev_name_node_add(net, dev->name_node);
write_unlock_bh(&dev_base_lock);
ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
@@ -1536,6 +1643,62 @@ static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
return nb->notifier_call(nb, val, &info);
}
+static int call_netdevice_register_notifiers(struct notifier_block *nb,
+ struct net_device *dev)
+{
+ int err;
+
+ err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
+ err = notifier_to_errno(err);
+ if (err)
+ return err;
+
+ if (!(dev->flags & IFF_UP))
+ return 0;
+
+ call_netdevice_notifier(nb, NETDEV_UP, dev);
+ return 0;
+}
+
+static void call_netdevice_unregister_notifiers(struct notifier_block *nb,
+ struct net_device *dev)
+{
+ if (dev->flags & IFF_UP) {
+ call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
+ dev);
+ call_netdevice_notifier(nb, NETDEV_DOWN, dev);
+ }
+ call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
+}
+
+static int call_netdevice_register_net_notifiers(struct notifier_block *nb,
+ struct net *net)
+{
+ struct net_device *dev;
+ int err;
+
+ for_each_netdev(net, dev) {
+ err = call_netdevice_register_notifiers(nb, dev);
+ if (err)
+ goto rollback;
+ }
+ return 0;
+
+rollback:
+ for_each_netdev_continue_reverse(net, dev)
+ call_netdevice_unregister_notifiers(nb, dev);
+ return err;
+}
+
+static void call_netdevice_unregister_net_notifiers(struct notifier_block *nb,
+ struct net *net)
+{
+ struct net_device *dev;
+
+ for_each_netdev(net, dev)
+ call_netdevice_unregister_notifiers(nb, dev);
+}
+
static int dev_boot_phase = 1;
/**
@@ -1554,8 +1717,6 @@ static int dev_boot_phase = 1;
int register_netdevice_notifier(struct notifier_block *nb)
{
- struct net_device *dev;
- struct net_device *last;
struct net *net;
int err;
@@ -1568,17 +1729,9 @@ int register_netdevice_notifier(struct notifier_block *nb)
if (dev_boot_phase)
goto unlock;
for_each_net(net) {
- for_each_netdev(net, dev) {
- err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
- err = notifier_to_errno(err);
- if (err)
- goto rollback;
-
- if (!(dev->flags & IFF_UP))
- continue;
-
- call_netdevice_notifier(nb, NETDEV_UP, dev);
- }
+ err = call_netdevice_register_net_notifiers(nb, net);
+ if (err)
+ goto rollback;
}
unlock:
@@ -1587,22 +1740,9 @@ unlock:
return err;
rollback:
- last = dev;
- for_each_net(net) {
- for_each_netdev(net, dev) {
- if (dev == last)
- goto outroll;
+ for_each_net_continue_reverse(net)
+ call_netdevice_unregister_net_notifiers(nb, net);
- if (dev->flags & IFF_UP) {
- call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
- dev);
- call_netdevice_notifier(nb, NETDEV_DOWN, dev);
- }
- call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
- }
- }
-
-outroll:
raw_notifier_chain_unregister(&netdev_chain, nb);
goto unlock;
}
@@ -1653,6 +1793,80 @@ unlock:
EXPORT_SYMBOL(unregister_netdevice_notifier);
/**
+ * register_netdevice_notifier_net - register a per-netns network notifier block
+ * @net: network namespace
+ * @nb: notifier
+ *
+ * Register a notifier to be called when network device events occur.
+ * The notifier passed is linked into the kernel structures and must
+ * not be reused until it has been unregistered. A negative errno code
+ * is returned on a failure.
+ *
+ * When registered all registration and up events are replayed
+ * to the new notifier to allow device to have a race free
+ * view of the network device list.
+ */
+
+int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb)
+{
+ int err;
+
+ rtnl_lock();
+ err = raw_notifier_chain_register(&net->netdev_chain, nb);
+ if (err)
+ goto unlock;
+ if (dev_boot_phase)
+ goto unlock;
+
+ err = call_netdevice_register_net_notifiers(nb, net);
+ if (err)
+ goto chain_unregister;
+
+unlock:
+ rtnl_unlock();
+ return err;
+
+chain_unregister:
+ raw_notifier_chain_unregister(&netdev_chain, nb);
+ goto unlock;
+}
+EXPORT_SYMBOL(register_netdevice_notifier_net);
+
+/**
+ * unregister_netdevice_notifier_net - unregister a per-netns
+ * network notifier block
+ * @net: network namespace
+ * @nb: notifier
+ *
+ * Unregister a notifier previously registered by
+ * register_netdevice_notifier(). The notifier is unlinked into the
+ * kernel structures and may then be reused. A negative errno code
+ * is returned on a failure.
+ *
+ * After unregistering unregister and down device events are synthesized
+ * for all devices on the device list to the removed notifier to remove
+ * the need for special case cleanup code.
+ */
+
+int unregister_netdevice_notifier_net(struct net *net,
+ struct notifier_block *nb)
+{
+ int err;
+
+ rtnl_lock();
+ err = raw_notifier_chain_unregister(&net->netdev_chain, nb);
+ if (err)
+ goto unlock;
+
+ call_netdevice_unregister_net_notifiers(nb, net);
+
+unlock:
+ rtnl_unlock();
+ return err;
+}
+EXPORT_SYMBOL(unregister_netdevice_notifier_net);
+
+/**
* call_netdevice_notifiers_info - call all network notifier blocks
* @val: value passed unmodified to notifier function
* @info: notifier information data
@@ -1664,7 +1878,18 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
static int call_netdevice_notifiers_info(unsigned long val,
struct netdev_notifier_info *info)
{
+ struct net *net = dev_net(info->dev);
+ int ret;
+
ASSERT_RTNL();
+
+ /* Run per-netns notifier block chain first, then run the global one.
+ * Hopefully, one day, the global one is going to be removed after
+ * all notifier block registrators get converted to be per-netns.
+ */
+ ret = raw_notifier_call_chain(&net->netdev_chain, val, info);
+ if (ret & NOTIFY_STOP_MASK)
+ return ret;
return raw_notifier_call_chain(&netdev_chain, val, info);
}
@@ -2690,7 +2915,7 @@ static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
void netif_schedule_queue(struct netdev_queue *txq)
{
rcu_read_lock();
- if (!(txq->state & QUEUE_STATE_ANY_XOFF)) {
+ if (!netif_xmit_stopped(txq)) {
struct Qdisc *q = rcu_dereference(txq->qdisc);
__netif_schedule(q);
@@ -2858,12 +3083,9 @@ int skb_checksum_help(struct sk_buff *skb)
offset += skb->csum_offset;
BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
- if (skb_cloned(skb) &&
- !skb_clone_writable(skb, offset + sizeof(__sum16))) {
- ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
- if (ret)
- goto out;
- }
+ ret = skb_ensure_writable(skb, offset + sizeof(__sum16));
+ if (ret)
+ goto out;
*(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
out_set_summed:
@@ -2898,12 +3120,11 @@ int skb_crc32c_csum_help(struct sk_buff *skb)
ret = -EINVAL;
goto out;
}
- if (skb_cloned(skb) &&
- !skb_clone_writable(skb, offset + sizeof(__le32))) {
- ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
- if (ret)
- goto out;
- }
+
+ ret = skb_ensure_writable(skb, offset + sizeof(__le32));
+ if (ret)
+ goto out;
+
crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start,
skb->len - start, ~(__u32)0,
crc32c_csum_stub));
@@ -3386,7 +3607,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_calculate_pkt_len(skb, q);
if (q->flags & TCQ_F_NOLOCK) {
- if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty &&
+ if ((q->flags & TCQ_F_CAN_BYPASS) && READ_ONCE(q->empty) &&
qdisc_run_begin(q)) {
if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
&q->state))) {
@@ -5582,6 +5803,26 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
}
EXPORT_SYMBOL(gro_find_complete_by_type);
+/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
+static void gro_normal_list(struct napi_struct *napi)
+{
+ if (!napi->rx_count)
+ return;
+ netif_receive_skb_list_internal(&napi->rx_list);
+ INIT_LIST_HEAD(&napi->rx_list);
+ napi->rx_count = 0;
+}
+
+/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
+ * pass the whole batch up to the stack.
+ */
+static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
+{
+ list_add_tail(&skb->list, &napi->rx_list);
+ if (++napi->rx_count >= gro_normal_batch)
+ gro_normal_list(napi);
+}
+
static void napi_skb_free_stolen_head(struct sk_buff *skb)
{
skb_dst_drop(skb);
@@ -5589,12 +5830,13 @@ static void napi_skb_free_stolen_head(struct sk_buff *skb)
kmem_cache_free(skbuff_head_cache, skb);
}
-static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
+static gro_result_t napi_skb_finish(struct napi_struct *napi,
+ struct sk_buff *skb,
+ gro_result_t ret)
{
switch (ret) {
case GRO_NORMAL:
- if (netif_receive_skb_internal(skb))
- ret = GRO_DROP;
+ gro_normal_one(napi, skb);
break;
case GRO_DROP:
@@ -5626,7 +5868,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
skb_gro_reset_offset(skb);
- ret = napi_skb_finish(dev_gro_receive(napi, skb), skb);
+ ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb));
trace_napi_gro_receive_exit(ret);
return ret;
@@ -5672,26 +5914,6 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
}
EXPORT_SYMBOL(napi_get_frags);
-/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
-static void gro_normal_list(struct napi_struct *napi)
-{
- if (!napi->rx_count)
- return;
- netif_receive_skb_list_internal(&napi->rx_list);
- INIT_LIST_HEAD(&napi->rx_list);
- napi->rx_count = 0;
-}
-
-/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
- * pass the whole batch up to the stack.
- */
-static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
-{
- list_add_tail(&skb->list, &napi->rx_list);
- if (++napi->rx_count >= gro_normal_batch)
- gro_normal_list(napi);
-}
-
static gro_result_t napi_frags_finish(struct napi_struct *napi,
struct sk_buff *skb,
gro_result_t ret)
@@ -8532,6 +8754,9 @@ static void rollback_registered_many(struct list_head *head)
dev_uc_flush(dev);
dev_mc_flush(dev);
+ netdev_name_node_alt_flush(dev);
+ netdev_name_node_free(dev->name_node);
+
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
@@ -9011,6 +9236,11 @@ int register_netdevice(struct net_device *dev)
if (ret < 0)
goto out;
+ ret = -ENOMEM;
+ dev->name_node = netdev_name_node_head_alloc(dev);
+ if (!dev->name_node)
+ goto out;
+
/* Init, if this function is available */
if (dev->netdev_ops->ndo_init) {
ret = dev->netdev_ops->ndo_init(dev);
@@ -9132,6 +9362,8 @@ out:
return ret;
err_uninit:
+ if (dev->name_node)
+ netdev_name_node_free(dev->name_node);
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
if (dev->priv_destructor)
@@ -9946,6 +10178,8 @@ static int __net_init netdev_init(struct net *net)
if (net->dev_index_head == NULL)
goto err_idx;
+ RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);
+
return 0;
err_idx:
diff --git a/net/core/devlink.c b/net/core/devlink.c
index f80151eeaf51..2e027c9436e0 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -95,16 +95,25 @@ static LIST_HEAD(devlink_list);
*/
static DEFINE_MUTEX(devlink_mutex);
-static struct net *devlink_net(const struct devlink *devlink)
+struct net *devlink_net(const struct devlink *devlink)
{
return read_pnet(&devlink->_net);
}
+EXPORT_SYMBOL_GPL(devlink_net);
-static void devlink_net_set(struct devlink *devlink, struct net *net)
+static void __devlink_net_set(struct devlink *devlink, struct net *net)
{
write_pnet(&devlink->_net, net);
}
+void devlink_net_set(struct devlink *devlink, struct net *net)
+{
+ if (WARN_ON(devlink->registered))
+ return;
+ __devlink_net_set(devlink, net);
+}
+EXPORT_SYMBOL_GPL(devlink_net_set);
+
static struct devlink *devlink_get_from_attrs(struct net *net,
struct nlattr **attrs)
{
@@ -434,8 +443,16 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
{
struct devlink *devlink;
- devlink = devlink_get_from_info(info);
- if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
+ /* When devlink changes netns, it would not be found
+ * by devlink_get_from_info(). So try if it is stored first.
+ */
+ if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) {
+ devlink = info->user_ptr[0];
+ } else {
+ devlink = devlink_get_from_info(info);
+ WARN_ON(IS_ERR(devlink));
+ }
+ if (!IS_ERR(devlink) && ~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
mutex_unlock(&devlink->lock);
mutex_unlock(&devlink_mutex);
}
@@ -1035,7 +1052,7 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
struct devlink_sb *devlink_sb;
int start = cb->args[0];
int idx = 0;
- int err;
+ int err = 0;
mutex_lock(&devlink_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
@@ -1058,6 +1075,9 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
out:
mutex_unlock(&devlink_mutex);
+ if (err != -EMSGSIZE)
+ return err;
+
cb->args[0] = idx;
return msg->len;
}
@@ -1233,7 +1253,7 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
struct devlink_sb *devlink_sb;
int start = cb->args[0];
int idx = 0;
- int err;
+ int err = 0;
mutex_lock(&devlink_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
@@ -1256,6 +1276,9 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
out:
mutex_unlock(&devlink_mutex);
+ if (err != -EMSGSIZE)
+ return err;
+
cb->args[0] = idx;
return msg->len;
}
@@ -1460,7 +1483,7 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
struct devlink_sb *devlink_sb;
int start = cb->args[0];
int idx = 0;
- int err;
+ int err = 0;
mutex_lock(&devlink_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
@@ -1485,6 +1508,9 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
out:
mutex_unlock(&devlink_mutex);
+ if (err != -EMSGSIZE)
+ return err;
+
cb->args[0] = idx;
return msg->len;
}
@@ -2674,6 +2700,72 @@ devlink_resources_validate(struct devlink *devlink,
return err;
}
+static struct net *devlink_netns_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID];
+ struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD];
+ struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID];
+ struct net *net;
+
+ if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) {
+ NL_SET_ERR_MSG(info->extack, "multiple netns identifying attributes specified");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (netns_pid_attr) {
+ net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr));
+ } else if (netns_fd_attr) {
+ net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr));
+ } else if (netns_id_attr) {
+ net = get_net_ns_by_id(sock_net(skb->sk),
+ nla_get_u32(netns_id_attr));
+ if (!net)
+ net = ERR_PTR(-EINVAL);
+ } else {
+ WARN_ON(1);
+ net = ERR_PTR(-EINVAL);
+ }
+ if (IS_ERR(net)) {
+ NL_SET_ERR_MSG(info->extack, "Unknown network namespace");
+ return ERR_PTR(-EINVAL);
+ }
+ if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+ put_net(net);
+ return ERR_PTR(-EPERM);
+ }
+ return net;
+}
+
+static void devlink_param_notify(struct devlink *devlink,
+ unsigned int port_index,
+ struct devlink_param_item *param_item,
+ enum devlink_command cmd);
+
+static void devlink_reload_netns_change(struct devlink *devlink,
+ struct net *dest_net)
+{
+ struct devlink_param_item *param_item;
+
+ /* Userspace needs to be notified about devlink objects
+ * removed from original and entering new network namespace.
+ * The rest of the devlink objects are re-created during
+ * reload process so the notifications are generated separatelly.
+ */
+
+ list_for_each_entry(param_item, &devlink->param_list, list)
+ devlink_param_notify(devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_DEL);
+ devlink_notify(devlink, DEVLINK_CMD_DEL);
+
+ __devlink_net_set(devlink, dest_net);
+
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+ list_for_each_entry(param_item, &devlink->param_list, list)
+ devlink_param_notify(devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_NEW);
+}
+
static bool devlink_reload_supported(struct devlink *devlink)
{
return devlink->ops->reload_down && devlink->ops->reload_up;
@@ -2694,9 +2786,30 @@ bool devlink_is_reload_failed(const struct devlink *devlink)
}
EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
+static int devlink_reload(struct devlink *devlink, struct net *dest_net,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (!devlink->reload_enabled)
+ return -EOPNOTSUPP;
+
+ err = devlink->ops->reload_down(devlink, !!dest_net, extack);
+ if (err)
+ return err;
+
+ if (dest_net && !net_eq(dest_net, devlink_net(devlink)))
+ devlink_reload_netns_change(devlink, dest_net);
+
+ err = devlink->ops->reload_up(devlink, extack);
+ devlink_reload_failed_set(devlink, !!err);
+ return err;
+}
+
static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
+ struct net *dest_net = NULL;
int err;
if (!devlink_reload_supported(devlink))
@@ -2707,11 +2820,20 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
return err;
}
- err = devlink->ops->reload_down(devlink, info->extack);
- if (err)
- return err;
- err = devlink->ops->reload_up(devlink, info->extack);
- devlink_reload_failed_set(devlink, !!err);
+
+ if (info->attrs[DEVLINK_ATTR_NETNS_PID] ||
+ info->attrs[DEVLINK_ATTR_NETNS_FD] ||
+ info->attrs[DEVLINK_ATTR_NETNS_ID]) {
+ dest_net = devlink_netns_get(skb, info);
+ if (IS_ERR(dest_net))
+ return PTR_ERR(dest_net);
+ }
+
+ err = devlink_reload(devlink, dest_net, info->extack);
+
+ if (dest_net)
+ put_net(dest_net);
+
return err;
}
@@ -3155,7 +3277,7 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
struct devlink *devlink;
int start = cb->args[0];
int idx = 0;
- int err;
+ int err = 0;
mutex_lock(&devlink_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
@@ -3183,6 +3305,9 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
out:
mutex_unlock(&devlink_mutex);
+ if (err != -EMSGSIZE)
+ return err;
+
cb->args[0] = idx;
return msg->len;
}
@@ -3411,7 +3536,7 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
struct devlink *devlink;
int start = cb->args[0];
int idx = 0;
- int err;
+ int err = 0;
mutex_lock(&devlink_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
@@ -3444,6 +3569,9 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
out:
mutex_unlock(&devlink_mutex);
+ if (err != -EMSGSIZE)
+ return err;
+
cb->args[0] = idx;
return msg->len;
}
@@ -3818,29 +3946,19 @@ static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb,
static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
+ const struct genl_dumpit_info *info = genl_dumpit_info(cb);
u64 ret_offset, start_offset, end_offset = 0;
+ struct nlattr **attrs = info->attrs;
struct devlink_region *region;
struct nlattr *chunks_attr;
const char *region_name;
struct devlink *devlink;
- struct nlattr **attrs;
bool dump = true;
void *hdr;
int err;
start_offset = *((u64 *)&cb->args[0]);
- attrs = kmalloc_array(DEVLINK_ATTR_MAX + 1, sizeof(*attrs), GFP_KERNEL);
- if (!attrs)
- return -ENOMEM;
-
- err = nlmsg_parse_deprecated(cb->nlh,
- GENL_HDRLEN + devlink_nl_family.hdrsize,
- attrs, DEVLINK_ATTR_MAX,
- devlink_nl_family.policy, cb->extack);
- if (err)
- goto out_free;
-
mutex_lock(&devlink_mutex);
devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
if (IS_ERR(devlink)) {
@@ -3917,7 +4035,6 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
genlmsg_end(skb, hdr);
mutex_unlock(&devlink->lock);
mutex_unlock(&devlink_mutex);
- kfree(attrs);
return skb->len;
@@ -3927,8 +4044,6 @@ out_unlock:
mutex_unlock(&devlink->lock);
out_dev:
mutex_unlock(&devlink_mutex);
-out_free:
- kfree(attrs);
return err;
}
@@ -4066,7 +4181,7 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
struct devlink *devlink;
int start = cb->args[0];
int idx = 0;
- int err;
+ int err = 0;
mutex_lock(&devlink_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
@@ -4094,6 +4209,9 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
}
mutex_unlock(&devlink_mutex);
+ if (err != -EMSGSIZE)
+ return err;
+
cb->args[0] = idx;
return msg->len;
}
@@ -4732,14 +4850,17 @@ EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
static int
devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
- void *priv_ctx)
+ void *priv_ctx, struct netlink_ext_ack *extack)
{
int err;
+ if (reporter->health_state == DEVLINK_HEALTH_REPORTER_STATE_HEALTHY)
+ return 0;
+
if (!reporter->ops->recover)
return -EOPNOTSUPP;
- err = reporter->ops->recover(reporter, priv_ctx);
+ err = reporter->ops->recover(reporter, priv_ctx, extack);
if (err)
return err;
@@ -4760,7 +4881,8 @@ devlink_health_dump_clear(struct devlink_health_reporter *reporter)
}
static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
- void *priv_ctx)
+ void *priv_ctx,
+ struct netlink_ext_ack *extack)
{
int err;
@@ -4781,7 +4903,7 @@ static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
goto dump_err;
err = reporter->ops->dump(reporter, reporter->dump_fmsg,
- priv_ctx);
+ priv_ctx, extack);
if (err)
goto dump_err;
@@ -4828,11 +4950,12 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
mutex_lock(&reporter->dump_lock);
/* store current dump of current error, for later analysis */
- devlink_health_do_dump(reporter, priv_ctx);
+ devlink_health_do_dump(reporter, priv_ctx, NULL);
mutex_unlock(&reporter->dump_lock);
if (reporter->auto_recover)
- return devlink_health_reporter_recover(reporter, priv_ctx);
+ return devlink_health_reporter_recover(reporter,
+ priv_ctx, NULL);
return 0;
}
@@ -4867,21 +4990,10 @@ devlink_health_reporter_get_from_info(struct devlink *devlink,
static struct devlink_health_reporter *
devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
{
+ const struct genl_dumpit_info *info = genl_dumpit_info(cb);
struct devlink_health_reporter *reporter;
+ struct nlattr **attrs = info->attrs;
struct devlink *devlink;
- struct nlattr **attrs;
- int err;
-
- attrs = kmalloc_array(DEVLINK_ATTR_MAX + 1, sizeof(*attrs), GFP_KERNEL);
- if (!attrs)
- return NULL;
-
- err = nlmsg_parse_deprecated(cb->nlh,
- GENL_HDRLEN + devlink_nl_family.hdrsize,
- attrs, DEVLINK_ATTR_MAX,
- devlink_nl_family.policy, cb->extack);
- if (err)
- goto free;
mutex_lock(&devlink_mutex);
devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
@@ -4890,12 +5002,9 @@ devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
reporter = devlink_health_reporter_get_from_attrs(devlink, attrs);
mutex_unlock(&devlink_mutex);
- kfree(attrs);
return reporter;
unlock:
mutex_unlock(&devlink_mutex);
-free:
- kfree(attrs);
return NULL;
}
@@ -5084,7 +5193,7 @@ static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
if (!reporter)
return -EINVAL;
- err = devlink_health_reporter_recover(reporter, NULL);
+ err = devlink_health_reporter_recover(reporter, NULL, info->extack);
devlink_health_reporter_put(reporter);
return err;
@@ -5117,7 +5226,7 @@ static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
if (err)
goto out;
- err = reporter->ops->diagnose(reporter, fmsg);
+ err = reporter->ops->diagnose(reporter, fmsg, info->extack);
if (err)
goto out;
@@ -5152,7 +5261,7 @@ devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb,
}
mutex_lock(&reporter->dump_lock);
if (!start) {
- err = devlink_health_do_dump(reporter, NULL);
+ err = devlink_health_do_dump(reporter, NULL, cb->extack);
if (err)
goto unlock;
cb->args[1] = reporter->dump_ts;
@@ -5793,6 +5902,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 },
[DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 },
};
static const struct genl_ops devlink_nl_ops[] = {
@@ -6023,7 +6135,8 @@ static const struct genl_ops devlink_nl_ops[] = {
},
{
.cmd = DEVLINK_CMD_REGION_READ,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
.dumpit = devlink_nl_cmd_region_read_dumpit,
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
@@ -6071,7 +6184,8 @@ static const struct genl_ops devlink_nl_ops[] = {
},
{
.cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
.dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit,
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
@@ -6155,7 +6269,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
if (!devlink)
return NULL;
devlink->ops = ops;
- devlink_net_set(devlink, &init_net);
+ __devlink_net_set(devlink, &init_net);
INIT_LIST_HEAD(&devlink->port_list);
INIT_LIST_HEAD(&devlink->sb_list);
INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
@@ -6181,6 +6295,7 @@ int devlink_register(struct devlink *devlink, struct device *dev)
{
mutex_lock(&devlink_mutex);
devlink->dev = dev;
+ devlink->registered = true;
list_add_tail(&devlink->list, &devlink_list);
devlink_notify(devlink, DEVLINK_CMD_NEW);
mutex_unlock(&devlink_mutex);
@@ -6196,6 +6311,8 @@ EXPORT_SYMBOL_GPL(devlink_register);
void devlink_unregister(struct devlink *devlink)
{
mutex_lock(&devlink_mutex);
+ WARN_ON(devlink_reload_supported(devlink) &&
+ devlink->reload_enabled);
devlink_notify(devlink, DEVLINK_CMD_DEL);
list_del(&devlink->list);
mutex_unlock(&devlink_mutex);
@@ -6203,6 +6320,41 @@ void devlink_unregister(struct devlink *devlink)
EXPORT_SYMBOL_GPL(devlink_unregister);
/**
+ * devlink_reload_enable - Enable reload of devlink instance
+ *
+ * @devlink: devlink
+ *
+ * Should be called at end of device initialization
+ * process when reload operation is supported.
+ */
+void devlink_reload_enable(struct devlink *devlink)
+{
+ mutex_lock(&devlink_mutex);
+ devlink->reload_enabled = true;
+ mutex_unlock(&devlink_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_reload_enable);
+
+/**
+ * devlink_reload_disable - Disable reload of devlink instance
+ *
+ * @devlink: devlink
+ *
+ * Should be called at the beginning of device cleanup
+ * process when reload operation is supported.
+ */
+void devlink_reload_disable(struct devlink *devlink)
+{
+ mutex_lock(&devlink_mutex);
+ /* Mutex is taken which ensures that no reload operation is in
+ * progress while setting up forbidded flag.
+ */
+ devlink->reload_enabled = false;
+ mutex_unlock(&devlink_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_reload_disable);
+
+/**
* devlink_free - Free devlink instance resources
*
* @devlink: devlink
@@ -7490,6 +7642,21 @@ static const struct devlink_trap devlink_trap_generic[] = {
DEVLINK_TRAP(BLACKHOLE_ROUTE, DROP),
DEVLINK_TRAP(TTL_ERROR, EXCEPTION),
DEVLINK_TRAP(TAIL_DROP, DROP),
+ DEVLINK_TRAP(NON_IP_PACKET, DROP),
+ DEVLINK_TRAP(UC_DIP_MC_DMAC, DROP),
+ DEVLINK_TRAP(DIP_LB, DROP),
+ DEVLINK_TRAP(SIP_MC, DROP),
+ DEVLINK_TRAP(SIP_LB, DROP),
+ DEVLINK_TRAP(CORRUPTED_IP_HDR, DROP),
+ DEVLINK_TRAP(IPV4_SIP_BC, DROP),
+ DEVLINK_TRAP(IPV6_MC_DIP_RESERVED_SCOPE, DROP),
+ DEVLINK_TRAP(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, DROP),
+ DEVLINK_TRAP(MTU_ERROR, EXCEPTION),
+ DEVLINK_TRAP(UNRESOLVED_NEIGH, EXCEPTION),
+ DEVLINK_TRAP(RPF, EXCEPTION),
+ DEVLINK_TRAP(REJECT_ROUTE, EXCEPTION),
+ DEVLINK_TRAP(IPV4_LPM_UNICAST_MISS, EXCEPTION),
+ DEVLINK_TRAP(IPV6_LPM_UNICAST_MISS, EXCEPTION),
};
#define DEVLINK_TRAP_GROUP(_id) \
@@ -8060,9 +8227,43 @@ int devlink_compat_switch_id_get(struct net_device *dev,
return 0;
}
+static void __net_exit devlink_pernet_pre_exit(struct net *net)
+{
+ struct devlink *devlink;
+ int err;
+
+ /* In case network namespace is getting destroyed, reload
+ * all devlink instances from this namespace into init_net.
+ */
+ mutex_lock(&devlink_mutex);
+ list_for_each_entry(devlink, &devlink_list, list) {
+ if (net_eq(devlink_net(devlink), net)) {
+ if (WARN_ON(!devlink_reload_supported(devlink)))
+ continue;
+ err = devlink_reload(devlink, &init_net, NULL);
+ if (err && err != -EOPNOTSUPP)
+ pr_warn("Failed to reload devlink instance into init_net\n");
+ }
+ }
+ mutex_unlock(&devlink_mutex);
+}
+
+static struct pernet_operations devlink_pernet_ops __net_initdata = {
+ .pre_exit = devlink_pernet_pre_exit,
+};
+
static int __init devlink_init(void)
{
- return genl_register_family(&devlink_nl_family);
+ int err;
+
+ err = genl_register_family(&devlink_nl_family);
+ if (err)
+ goto out;
+ err = register_pernet_subsys(&devlink_pernet_ops);
+
+out:
+ WARN_ON(err);
+ return err;
}
subsys_initcall(devlink_init);
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 470a606d5e8d..fc96259807b6 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -12,17 +12,15 @@ static unsigned int fib_notifier_net_id;
struct fib_notifier_net {
struct list_head fib_notifier_ops;
+ struct atomic_notifier_head fib_chain;
};
-static ATOMIC_NOTIFIER_HEAD(fib_chain);
-
-int call_fib_notifier(struct notifier_block *nb, struct net *net,
+int call_fib_notifier(struct notifier_block *nb,
enum fib_event_type event_type,
struct fib_notifier_info *info)
{
int err;
- info->net = net;
err = nb->notifier_call(nb, event_type, info);
return notifier_to_errno(err);
}
@@ -31,106 +29,100 @@ EXPORT_SYMBOL(call_fib_notifier);
int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
struct fib_notifier_info *info)
{
+ struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
int err;
- info->net = net;
- err = atomic_notifier_call_chain(&fib_chain, event_type, info);
+ err = atomic_notifier_call_chain(&fn_net->fib_chain, event_type, info);
return notifier_to_errno(err);
}
EXPORT_SYMBOL(call_fib_notifiers);
-static unsigned int fib_seq_sum(void)
+static unsigned int fib_seq_sum(struct net *net)
{
- struct fib_notifier_net *fn_net;
+ struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
struct fib_notifier_ops *ops;
unsigned int fib_seq = 0;
- struct net *net;
rtnl_lock();
- down_read(&net_rwsem);
- for_each_net(net) {
- fn_net = net_generic(net, fib_notifier_net_id);
- rcu_read_lock();
- list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) {
- if (!try_module_get(ops->owner))
- continue;
- fib_seq += ops->fib_seq_read(net);
- module_put(ops->owner);
- }
- rcu_read_unlock();
+ rcu_read_lock();
+ list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) {
+ if (!try_module_get(ops->owner))
+ continue;
+ fib_seq += ops->fib_seq_read(net);
+ module_put(ops->owner);
}
- up_read(&net_rwsem);
+ rcu_read_unlock();
rtnl_unlock();
return fib_seq;
}
-static int fib_net_dump(struct net *net, struct notifier_block *nb)
+static int fib_net_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
struct fib_notifier_ops *ops;
+ int err = 0;
+ rcu_read_lock();
list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) {
- int err;
-
if (!try_module_get(ops->owner))
continue;
- err = ops->fib_dump(net, nb);
+ err = ops->fib_dump(net, nb, extack);
module_put(ops->owner);
if (err)
- return err;
+ goto unlock;
}
- return 0;
+unlock:
+ rcu_read_unlock();
+
+ return err;
}
-static bool fib_dump_is_consistent(struct notifier_block *nb,
+static bool fib_dump_is_consistent(struct net *net, struct notifier_block *nb,
void (*cb)(struct notifier_block *nb),
unsigned int fib_seq)
{
- atomic_notifier_chain_register(&fib_chain, nb);
- if (fib_seq == fib_seq_sum())
+ struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
+
+ atomic_notifier_chain_register(&fn_net->fib_chain, nb);
+ if (fib_seq == fib_seq_sum(net))
return true;
- atomic_notifier_chain_unregister(&fib_chain, nb);
+ atomic_notifier_chain_unregister(&fn_net->fib_chain, nb);
if (cb)
cb(nb);
return false;
}
#define FIB_DUMP_MAX_RETRIES 5
-int register_fib_notifier(struct notifier_block *nb,
- void (*cb)(struct notifier_block *nb))
+int register_fib_notifier(struct net *net, struct notifier_block *nb,
+ void (*cb)(struct notifier_block *nb),
+ struct netlink_ext_ack *extack)
{
int retries = 0;
int err;
do {
- unsigned int fib_seq = fib_seq_sum();
- struct net *net;
-
- rcu_read_lock();
- for_each_net_rcu(net) {
- err = fib_net_dump(net, nb);
- if (err)
- goto err_fib_net_dump;
- }
- rcu_read_unlock();
-
- if (fib_dump_is_consistent(nb, cb, fib_seq))
+ unsigned int fib_seq = fib_seq_sum(net);
+
+ err = fib_net_dump(net, nb, extack);
+ if (err)
+ return err;
+
+ if (fib_dump_is_consistent(net, nb, cb, fib_seq))
return 0;
} while (++retries < FIB_DUMP_MAX_RETRIES);
return -EBUSY;
-
-err_fib_net_dump:
- rcu_read_unlock();
- return err;
}
EXPORT_SYMBOL(register_fib_notifier);
-int unregister_fib_notifier(struct notifier_block *nb)
+int unregister_fib_notifier(struct net *net, struct notifier_block *nb)
{
- return atomic_notifier_chain_unregister(&fib_chain, nb);
+ struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
+
+ return atomic_notifier_chain_unregister(&fn_net->fib_chain, nb);
}
EXPORT_SYMBOL(unregister_fib_notifier);
@@ -181,6 +173,7 @@ static int __net_init fib_notifier_net_init(struct net *net)
struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
INIT_LIST_HEAD(&fn_net->fib_notifier_ops);
+ ATOMIC_INIT_NOTIFIER_HEAD(&fn_net->fib_chain);
return 0;
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index dd220ce7ca7a..3e7e15278c46 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -321,16 +321,18 @@ out:
}
EXPORT_SYMBOL_GPL(fib_rules_lookup);
-static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
+static int call_fib_rule_notifier(struct notifier_block *nb,
enum fib_event_type event_type,
- struct fib_rule *rule, int family)
+ struct fib_rule *rule, int family,
+ struct netlink_ext_ack *extack)
{
struct fib_rule_notifier_info info = {
.info.family = family,
+ .info.extack = extack,
.rule = rule,
};
- return call_fib_notifier(nb, net, event_type, &info.info);
+ return call_fib_notifier(nb, event_type, &info.info);
}
static int call_fib_rule_notifiers(struct net *net,
@@ -350,20 +352,25 @@ static int call_fib_rule_notifiers(struct net *net,
}
/* Called with rcu_read_lock() */
-int fib_rules_dump(struct net *net, struct notifier_block *nb, int family)
+int fib_rules_dump(struct net *net, struct notifier_block *nb, int family,
+ struct netlink_ext_ack *extack)
{
struct fib_rules_ops *ops;
struct fib_rule *rule;
+ int err = 0;
ops = lookup_rules_ops(net, family);
if (!ops)
return -EAFNOSUPPORT;
- list_for_each_entry_rcu(rule, &ops->rules_list, list)
- call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule,
- family);
+ list_for_each_entry_rcu(rule, &ops->rules_list, list) {
+ err = call_fib_rule_notifier(nb, FIB_EVENT_RULE_ADD,
+ rule, family, extack);
+ if (err)
+ break;
+ }
rules_ops_put(ops);
- return 0;
+ return err;
}
EXPORT_SYMBOL_GPL(fib_rules_dump);
diff --git a/net/core/filter.c b/net/core/filter.c
index 3fed5755494b..fc303abec8fa 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2245,7 +2245,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
* account for the headroom.
*/
bytes_sg_total = start - offset + bytes;
- if (!msg->sg.copy[i] && bytes_sg_total <= len)
+ if (!test_bit(i, &msg->sg.copy) && bytes_sg_total <= len)
goto out;
/* At this point we need to linearize multiple scatterlist
@@ -2450,7 +2450,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
/* Place newly allocated data buffer */
sk_mem_charge(msg->sk, len);
msg->sg.size += len;
- msg->sg.copy[new] = false;
+ __clear_bit(new, &msg->sg.copy);
sg_set_page(&msg->sg.data[new], page, len + copy, 0);
if (rsge.length) {
get_page(sg_page(&rsge));
@@ -3798,7 +3798,7 @@ BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
return -EINVAL;
- if (unlikely(skb_size > skb->len))
+ if (unlikely(!skb || skb_size > skb->len))
return -EFAULT;
return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
@@ -3816,6 +3816,19 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
+static u32 bpf_skb_output_btf_ids[5];
+const struct bpf_func_proto bpf_skb_output_proto = {
+ .func = bpf_skb_event_output,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+ .btf_id = bpf_skb_output_btf_ids,
+};
+
static unsigned short bpf_tunnel_key_af(u64 flags)
{
return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 68eda10d0680..ca871657a4c4 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -114,19 +114,50 @@ int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
{
struct bpf_prog *attached;
struct net *net;
+ int ret = 0;
net = current->nsproxy->net_ns;
mutex_lock(&flow_dissector_mutex);
+
+ if (net == &init_net) {
+ /* BPF flow dissector in the root namespace overrides
+ * any per-net-namespace one. When attaching to root,
+ * make sure we don't have any BPF program attached
+ * to the non-root namespaces.
+ */
+ struct net *ns;
+
+ for_each_net(ns) {
+ if (ns == &init_net)
+ continue;
+ if (rcu_access_pointer(ns->flow_dissector_prog)) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
+ } else {
+ /* Make sure root flow dissector is not attached
+ * when attaching to the non-root namespace.
+ */
+ if (rcu_access_pointer(init_net.flow_dissector_prog)) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
+
attached = rcu_dereference_protected(net->flow_dissector_prog,
lockdep_is_held(&flow_dissector_mutex));
- if (attached) {
- /* Only one BPF program can be attached at a time */
- mutex_unlock(&flow_dissector_mutex);
- return -EEXIST;
+ if (attached == prog) {
+ /* The same program cannot be attached twice */
+ ret = -EINVAL;
+ goto out;
}
rcu_assign_pointer(net->flow_dissector_prog, prog);
+ if (attached)
+ bpf_prog_put(attached);
+out:
mutex_unlock(&flow_dissector_mutex);
- return 0;
+ return ret;
}
int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
@@ -147,27 +178,6 @@ int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
mutex_unlock(&flow_dissector_mutex);
return 0;
}
-/**
- * skb_flow_get_be16 - extract be16 entity
- * @skb: sk_buff to extract from
- * @poff: offset to extract at
- * @data: raw buffer pointer to the packet
- * @hlen: packet header length
- *
- * The function will try to retrieve a be32 entity at
- * offset poff
- */
-static __be16 skb_flow_get_be16(const struct sk_buff *skb, int poff,
- void *data, int hlen)
-{
- __be16 *u, _u;
-
- u = __skb_header_pointer(skb, poff, sizeof(_u), data, hlen, &_u);
- if (u)
- return *u;
-
- return 0;
-}
/**
* __skb_flow_get_ports - extract the upper layer ports and return them
@@ -203,6 +213,72 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
}
EXPORT_SYMBOL(__skb_flow_get_ports);
+static bool icmp_has_id(u8 type)
+{
+ switch (type) {
+ case ICMP_ECHO:
+ case ICMP_ECHOREPLY:
+ case ICMP_TIMESTAMP:
+ case ICMP_TIMESTAMPREPLY:
+ case ICMPV6_ECHO_REQUEST:
+ case ICMPV6_ECHO_REPLY:
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * skb_flow_get_icmp_tci - extract ICMP(6) Type, Code and Identifier fields
+ * @skb: sk_buff to extract from
+ * @key_icmp: struct flow_dissector_key_icmp to fill
+ * @data: raw buffer pointer to the packet
+ * @toff: offset to extract at
+ * @hlen: packet header length
+ */
+void skb_flow_get_icmp_tci(const struct sk_buff *skb,
+ struct flow_dissector_key_icmp *key_icmp,
+ void *data, int thoff, int hlen)
+{
+ struct icmphdr *ih, _ih;
+
+ ih = __skb_header_pointer(skb, thoff, sizeof(_ih), data, hlen, &_ih);
+ if (!ih)
+ return;
+
+ key_icmp->type = ih->type;
+ key_icmp->code = ih->code;
+
+ /* As we use 0 to signal that the Id field is not present,
+ * avoid confusion with packets without such field
+ */
+ if (icmp_has_id(ih->type))
+ key_icmp->id = ih->un.echo.id ? : 1;
+ else
+ key_icmp->id = 0;
+}
+EXPORT_SYMBOL(skb_flow_get_icmp_tci);
+
+/* If FLOW_DISSECTOR_KEY_ICMP is set, dissect an ICMP packet
+ * using skb_flow_get_icmp_tci().
+ */
+static void __skb_flow_dissect_icmp(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container,
+ void *data, int thoff, int hlen)
+{
+ struct flow_dissector_key_icmp *key_icmp;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ICMP))
+ return;
+
+ key_icmp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ICMP,
+ target_container);
+
+ skb_flow_get_icmp_tci(skb, key_icmp, data, thoff, hlen);
+}
+
void skb_flow_dissect_meta(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container)
@@ -853,7 +929,6 @@ bool __skb_flow_dissect(const struct net *net,
struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs;
struct flow_dissector_key_ports *key_ports;
- struct flow_dissector_key_icmp *key_icmp;
struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_vlan *key_vlan;
struct bpf_prog *attached = NULL;
@@ -910,7 +985,10 @@ bool __skb_flow_dissect(const struct net *net,
WARN_ON_ONCE(!net);
if (net) {
rcu_read_lock();
- attached = rcu_dereference(net->flow_dissector_prog);
+ attached = rcu_dereference(init_net.flow_dissector_prog);
+
+ if (!attached)
+ attached = rcu_dereference(net->flow_dissector_prog);
if (attached) {
struct bpf_flow_keys flow_keys;
@@ -1295,6 +1373,12 @@ ip_proto_again:
data, nhoff, hlen);
break;
+ case IPPROTO_ICMP:
+ case IPPROTO_ICMPV6:
+ __skb_flow_dissect_icmp(skb, flow_dissector, target_container,
+ data, nhoff, hlen);
+ break;
+
default:
break;
}
@@ -1308,14 +1392,6 @@ ip_proto_again:
data, hlen);
}
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_ICMP)) {
- key_icmp = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_ICMP,
- target_container);
- key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen);
- }
-
/* Process result of IP proto processing */
switch (fdret) {
case FLOW_DISSECT_RET_PROTO_AGAIN:
@@ -1365,8 +1441,8 @@ static const void *flow_keys_hash_start(const struct flow_keys *flow)
static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
{
size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs);
- BUILD_BUG_ON(offsetof(typeof(*flow), addrs) !=
- sizeof(*flow) - sizeof(flow->addrs));
+
+ BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32));
switch (flow->control.addr_type) {
case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
@@ -1412,6 +1488,9 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow)
}
EXPORT_SYMBOL(flow_get_u32_dst);
+/* Sort the source and destination IP (and the ports if the IP are the same),
+ * to have consistent hash within the two directions
+ */
static inline void __flow_hash_consistentify(struct flow_keys *keys)
{
int addr_diff, i;
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index bfe7bdd4c340..80dbf2f4016e 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -48,7 +48,7 @@ struct net_rate_estimator {
u8 intvl_log; /* period : (250ms << intvl_log) */
seqcount_t seq;
- u32 last_packets;
+ u64 last_packets;
u64 last_bytes;
u64 avpps;
@@ -83,7 +83,7 @@ static void est_timer(struct timer_list *t)
brate = (b.bytes - est->last_bytes) << (10 - est->ewma_log - est->intvl_log);
brate -= (est->avbps >> est->ewma_log);
- rate = (u64)(b.packets - est->last_packets) << (10 - est->ewma_log - est->intvl_log);
+ rate = (b.packets - est->last_packets) << (10 - est->ewma_log - est->intvl_log);
rate -= (est->avpps >> est->ewma_log);
write_seqcount_begin(&est->seq);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 36888f5e09eb..1d653fbfcf52 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -123,8 +123,7 @@ __gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
for_each_possible_cpu(i) {
struct gnet_stats_basic_cpu *bcpu = per_cpu_ptr(cpu, i);
unsigned int start;
- u64 bytes;
- u32 packets;
+ u64 bytes, packets;
do {
start = u64_stats_fetch_begin_irq(&bcpu->syncp);
@@ -176,12 +175,17 @@ ___gnet_stats_copy_basic(const seqcount_t *running,
if (d->tail) {
struct gnet_stats_basic sb;
+ int res;
memset(&sb, 0, sizeof(sb));
sb.bytes = bstats.bytes;
sb.packets = bstats.packets;
- return gnet_stats_copy(d, type, &sb, sizeof(sb),
- TCA_STATS_PAD);
+ res = gnet_stats_copy(d, type, &sb, sizeof(sb), TCA_STATS_PAD);
+ if (res < 0 || sb.packets == bstats.packets)
+ return res;
+ /* emit 64bit stats only if needed */
+ return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats.packets,
+ sizeof(bstats.packets), TCA_STATS_PAD);
}
return 0;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 5480edff0c86..652da6369037 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1197,7 +1197,7 @@ static void neigh_update_hhs(struct neighbour *neigh)
if (update) {
hh = &neigh->hh;
- if (hh->hh_len) {
+ if (READ_ONCE(hh->hh_len)) {
write_seqlock_bh(&hh->hh_lock);
update(hh, neigh->dev, neigh->ha);
write_sequnlock_bh(&hh->hh_lock);
@@ -1476,7 +1476,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
struct net_device *dev = neigh->dev;
unsigned int seq;
- if (dev->header_ops->cache && !neigh->hh.hh_len)
+ if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
neigh_hh_init(neigh);
do {
@@ -2052,8 +2052,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
goto nla_put_failure;
{
unsigned long now = jiffies;
- unsigned int flush_delta = now - tbl->last_flush;
- unsigned int rand_delta = now - tbl->last_rand;
+ long flush_delta = now - tbl->last_flush;
+ long rand_delta = now - tbl->last_rand;
struct neigh_hash_table *nht;
struct ndt_config ndc = {
.ndtc_key_len = tbl->key_len,
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 36347933ec3a..6bbd06f7dc7d 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -20,8 +20,8 @@ static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff
struct hlist_head *h;
unsigned int count = 0, offset = get_offset(*pos);
- h = &net->dev_name_head[get_bucket(*pos)];
- hlist_for_each_entry_rcu(dev, h, name_hlist) {
+ h = &net->dev_index_head[get_bucket(*pos)];
+ hlist_for_each_entry_rcu(dev, h, index_hlist) {
if (++count == offset)
return dev;
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 48b1e429857c..294bfcf0ce0e 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3404,7 +3404,6 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
HARD_TX_LOCK(odev, txq, smp_processor_id());
if (unlikely(netif_xmit_frozen_or_drv_stopped(txq))) {
- ret = NETDEV_TX_BUSY;
pkt_dev->last_ok = 0;
goto unlock;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c81cd80114d9..000eddb1207d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -980,6 +980,19 @@ static size_t rtnl_xdp_size(void)
return xdp_size;
}
+static size_t rtnl_prop_list_size(const struct net_device *dev)
+{
+ struct netdev_name_node *name_node;
+ size_t size;
+
+ if (list_empty(&dev->name_node->list))
+ return 0;
+ size = nla_total_size(0);
+ list_for_each_entry(name_node, &dev->name_node->list, list)
+ size += nla_total_size(ALTIFNAMSIZ);
+ return size;
+}
+
static noinline size_t if_nlmsg_size(const struct net_device *dev,
u32 ext_filter_mask)
{
@@ -1027,6 +1040,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(4) /* IFLA_CARRIER_DOWN_COUNT */
+ nla_total_size(4) /* IFLA_MIN_MTU */
+ nla_total_size(4) /* IFLA_MAX_MTU */
+ + rtnl_prop_list_size(dev)
+ 0;
}
@@ -1584,6 +1598,42 @@ static int rtnl_fill_link_af(struct sk_buff *skb,
return 0;
}
+static int rtnl_fill_alt_ifnames(struct sk_buff *skb,
+ const struct net_device *dev)
+{
+ struct netdev_name_node *name_node;
+ int count = 0;
+
+ list_for_each_entry(name_node, &dev->name_node->list, list) {
+ if (nla_put_string(skb, IFLA_ALT_IFNAME, name_node->name))
+ return -EMSGSIZE;
+ count++;
+ }
+ return count;
+}
+
+static int rtnl_fill_prop_list(struct sk_buff *skb,
+ const struct net_device *dev)
+{
+ struct nlattr *prop_list;
+ int ret;
+
+ prop_list = nla_nest_start(skb, IFLA_PROP_LIST);
+ if (!prop_list)
+ return -EMSGSIZE;
+
+ ret = rtnl_fill_alt_ifnames(skb, dev);
+ if (ret <= 0)
+ goto nest_cancel;
+
+ nla_nest_end(skb, prop_list);
+ return 0;
+
+nest_cancel:
+ nla_nest_cancel(skb, prop_list);
+ return ret;
+}
+
static int rtnl_fill_ifinfo(struct sk_buff *skb,
struct net_device *dev, struct net *src_net,
int type, u32 pid, u32 seq, u32 change,
@@ -1697,6 +1747,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
goto nla_put_failure_rcu;
rcu_read_unlock();
+ if (rtnl_fill_prop_list(skb, dev))
+ goto nla_put_failure;
+
nlmsg_end(skb, nlh);
return 0;
@@ -1750,6 +1803,9 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 },
[IFLA_MIN_MTU] = { .type = NLA_U32 },
[IFLA_MAX_MTU] = { .type = NLA_U32 },
+ [IFLA_PROP_LIST] = { .type = NLA_NESTED },
+ [IFLA_ALT_IFNAME] = { .type = NLA_STRING,
+ .len = ALTIFNAMSIZ - 1 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2723,6 +2779,26 @@ errout:
return err;
}
+static struct net_device *rtnl_dev_get(struct net *net,
+ struct nlattr *ifname_attr,
+ struct nlattr *altifname_attr,
+ char *ifname)
+{
+ char buffer[ALTIFNAMSIZ];
+
+ if (!ifname) {
+ ifname = buffer;
+ if (ifname_attr)
+ nla_strlcpy(ifname, ifname_attr, IFNAMSIZ);
+ else if (altifname_attr)
+ nla_strlcpy(ifname, altifname_attr, ALTIFNAMSIZ);
+ else
+ return NULL;
+ }
+
+ return __dev_get_by_name(net, ifname);
+}
+
static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -2751,8 +2827,8 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(net, ifm->ifi_index);
- else if (tb[IFLA_IFNAME])
- dev = __dev_get_by_name(net, ifname);
+ else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
+ dev = rtnl_dev_get(net, NULL, tb[IFLA_ALT_IFNAME], ifname);
else
goto errout;
@@ -2825,7 +2901,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *tgt_net = net;
struct net_device *dev = NULL;
struct ifinfomsg *ifm;
- char ifname[IFNAMSIZ];
struct nlattr *tb[IFLA_MAX+1];
int err;
int netnsid = -1;
@@ -2839,9 +2914,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
- if (tb[IFLA_IFNAME])
- nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
-
if (tb[IFLA_TARGET_NETNSID]) {
netnsid = nla_get_s32(tb[IFLA_TARGET_NETNSID]);
tgt_net = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, netnsid);
@@ -2853,8 +2925,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
- else if (tb[IFLA_IFNAME])
- dev = __dev_get_by_name(tgt_net, ifname);
+ else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
+ dev = rtnl_dev_get(net, tb[IFLA_IFNAME],
+ tb[IFLA_ALT_IFNAME], NULL);
else if (tb[IFLA_GROUP])
err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP]));
else
@@ -3025,12 +3098,10 @@ replay:
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(net, ifm->ifi_index);
- else {
- if (ifname[0])
- dev = __dev_get_by_name(net, ifname);
- else
- dev = NULL;
- }
+ else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
+ dev = rtnl_dev_get(net, NULL, tb[IFLA_ALT_IFNAME], ifname);
+ else
+ dev = NULL;
if (dev) {
master_dev = netdev_master_upper_dev_get(dev);
@@ -3292,6 +3363,7 @@ static int rtnl_valid_getlink_req(struct sk_buff *skb,
switch (i) {
case IFLA_IFNAME:
+ case IFLA_ALT_IFNAME:
case IFLA_EXT_MASK:
case IFLA_TARGET_NETNSID:
break;
@@ -3310,7 +3382,6 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
struct net *tgt_net = net;
struct ifinfomsg *ifm;
- char ifname[IFNAMSIZ];
struct nlattr *tb[IFLA_MAX+1];
struct net_device *dev = NULL;
struct sk_buff *nskb;
@@ -3333,9 +3404,6 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
return PTR_ERR(tgt_net);
}
- if (tb[IFLA_IFNAME])
- nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
-
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
@@ -3343,8 +3411,9 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
- else if (tb[IFLA_IFNAME])
- dev = __dev_get_by_name(tgt_net, ifname);
+ else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
+ dev = rtnl_dev_get(tgt_net, tb[IFLA_IFNAME],
+ tb[IFLA_ALT_IFNAME], NULL);
else
goto out;
@@ -3374,6 +3443,100 @@ out:
return err;
}
+static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr,
+ bool *changed, struct netlink_ext_ack *extack)
+{
+ char *alt_ifname;
+ int err;
+
+ err = nla_validate(attr, attr->nla_len, IFLA_MAX, ifla_policy, extack);
+ if (err)
+ return err;
+
+ alt_ifname = nla_data(attr);
+ if (cmd == RTM_NEWLINKPROP) {
+ alt_ifname = kstrdup(alt_ifname, GFP_KERNEL);
+ if (!alt_ifname)
+ return -ENOMEM;
+ err = netdev_name_node_alt_create(dev, alt_ifname);
+ if (err) {
+ kfree(alt_ifname);
+ return err;
+ }
+ } else if (cmd == RTM_DELLINKPROP) {
+ err = netdev_name_node_alt_destroy(dev, alt_ifname);
+ if (err)
+ return err;
+ } else {
+ WARN_ON(1);
+ return 0;
+ }
+
+ *changed = true;
+ return 0;
+}
+
+static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[IFLA_MAX + 1];
+ struct net_device *dev;
+ struct ifinfomsg *ifm;
+ bool changed = false;
+ struct nlattr *attr;
+ int err, rem;
+
+ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack);
+ if (err)
+ return err;
+
+ err = rtnl_ensure_unique_netns(tb, extack, true);
+ if (err)
+ return err;
+
+ ifm = nlmsg_data(nlh);
+ if (ifm->ifi_index > 0)
+ dev = __dev_get_by_index(net, ifm->ifi_index);
+ else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
+ dev = rtnl_dev_get(net, tb[IFLA_IFNAME],
+ tb[IFLA_ALT_IFNAME], NULL);
+ else
+ return -EINVAL;
+
+ if (!dev)
+ return -ENODEV;
+
+ if (!tb[IFLA_PROP_LIST])
+ return 0;
+
+ nla_for_each_nested(attr, tb[IFLA_PROP_LIST], rem) {
+ switch (nla_type(attr)) {
+ case IFLA_ALT_IFNAME:
+ err = rtnl_alt_ifname(cmd, dev, attr, &changed, extack);
+ if (err)
+ return err;
+ break;
+ }
+ }
+
+ if (changed)
+ netdev_state_change(dev);
+ return 0;
+}
+
+static int rtnl_newlinkprop(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ return rtnl_linkprop(RTM_NEWLINKPROP, skb, nlh, extack);
+}
+
+static int rtnl_dellinkprop(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ return rtnl_linkprop(RTM_DELLINKPROP, skb, nlh, extack);
+}
+
static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
@@ -5332,6 +5495,9 @@ void __init rtnetlink_init(void)
rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, 0);
rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, 0);
+ rtnl_register(PF_UNSPEC, RTM_NEWLINKPROP, rtnl_newlinkprop, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_DELLINKPROP, rtnl_dellinkprop, NULL, 0);
+
rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0);
rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0);
rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0);
diff --git a/net/core/sock.c b/net/core/sock.c
index ac78a570e43a..71787f7c4f8c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -333,7 +333,6 @@ EXPORT_SYMBOL(__sk_backlog_rcv);
static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
{
struct __kernel_sock_timeval tv;
- int size;
if (timeo == MAX_SCHEDULE_TIMEOUT) {
tv.tv_sec = 0;
@@ -354,13 +353,11 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
old_tv.tv_sec = tv.tv_sec;
old_tv.tv_usec = tv.tv_usec;
*(struct __kernel_old_timeval *)optval = old_tv;
- size = sizeof(old_tv);
- } else {
- *(struct __kernel_sock_timeval *)optval = tv;
- size = sizeof(tv);
+ return sizeof(old_tv);
}
- return size;
+ *(struct __kernel_sock_timeval *)optval = tv;
+ return sizeof(tv);
}
static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool old_timeval)
@@ -687,7 +684,8 @@ out:
return ret;
}
-static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
+static inline void sock_valbool_flag(struct sock *sk, enum sock_flags bit,
+ int valbool)
{
if (valbool)
sock_set_flag(sk, bit);
@@ -3015,7 +3013,7 @@ int sock_gettstamp(struct socket *sock, void __user *userstamp,
return -ENOENT;
if (ts.tv_sec == 0) {
ktime_t kt = ktime_get_real();
- sock_write_timestamp(sk, kt);;
+ sock_write_timestamp(sk, kt);
ts = ktime_to_timespec64(kt);
}
@@ -3042,7 +3040,7 @@ int sock_gettstamp(struct socket *sock, void __user *userstamp,
}
EXPORT_SYMBOL(sock_gettstamp);
-void sock_enable_timestamp(struct sock *sk, int flag)
+void sock_enable_timestamp(struct sock *sk, enum sock_flags flag)
{
if (!sock_flag(sk, flag)) {
unsigned long previous_flags = sk->sk_flags;
diff --git a/net/core/xdp.c b/net/core/xdp.c
index d7bf62ffbb5e..20781ad5f9c3 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -386,7 +386,7 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
/* XDP RX runs under NAPI protection, and in different delivery error
* scenarios (e.g. queue full), it is possible to return the xdp_frame
- * while still leveraging this protection. The @napi_direct boolian
+ * while still leveraging this protection. The @napi_direct boolean
* is used for those calls sites. Thus, allowing for faster recycling
* of xdp_frames/pages in those cases.
*/
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 5bad08dc4316..a52e8ba1ced0 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -944,7 +944,7 @@ int inet_dccp_listen(struct socket *sock, int backlog)
if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
goto out;
- sk->sk_max_ack_backlog = backlog;
+ WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
/* Really, if the socket is already in listen state
* we can only allow the backlog to be adjusted.
*/
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 3349ea81f901..e19a92a62e14 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1091,7 +1091,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags,
}
cb = DN_SKB_CB(skb);
- sk->sk_ack_backlog--;
+ sk_acceptq_removed(sk);
newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, kern);
if (newsk == NULL) {
release_sock(sk);
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index e4161e0c86aa..c68503a18025 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -328,7 +328,7 @@ static void dn_nsp_conn_init(struct sock *sk, struct sk_buff *skb)
return;
}
- sk->sk_ack_backlog++;
+ sk_acceptq_added(sk);
skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_state_change(sk);
}
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 43120a3fb06f..17281fec710c 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -246,7 +246,9 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
#ifdef CONFIG_PM_SLEEP
static bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
{
- return dsa_is_user_port(ds, p) && ds->ports[p].slave;
+ const struct dsa_port *dp = dsa_to_port(ds, p);
+
+ return dp->type == DSA_PORT_TYPE_USER && dp->slave;
}
int dsa_switch_suspend(struct dsa_switch *ds)
@@ -258,7 +260,7 @@ int dsa_switch_suspend(struct dsa_switch *ds)
if (!dsa_is_port_initialized(ds, i))
continue;
- ret = dsa_slave_suspend(ds->ports[i].slave);
+ ret = dsa_slave_suspend(dsa_to_port(ds, i)->slave);
if (ret)
return ret;
}
@@ -285,7 +287,7 @@ int dsa_switch_resume(struct dsa_switch *ds)
if (!dsa_is_port_initialized(ds, i))
continue;
- ret = dsa_slave_resume(ds->ports[i].slave);
+ ret = dsa_slave_resume(dsa_to_port(ds, i)->slave);
if (ret)
return ret;
}
@@ -329,6 +331,91 @@ int call_dsa_notifiers(unsigned long val, struct net_device *dev,
}
EXPORT_SYMBOL_GPL(call_dsa_notifiers);
+int dsa_devlink_param_get(struct devlink *dl, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct dsa_devlink_priv *dl_priv;
+ struct dsa_switch *ds;
+
+ dl_priv = devlink_priv(dl);
+ ds = dl_priv->ds;
+
+ if (!ds->ops->devlink_param_get)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_param_get(ds, id, ctx);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_param_get);
+
+int dsa_devlink_param_set(struct devlink *dl, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct dsa_devlink_priv *dl_priv;
+ struct dsa_switch *ds;
+
+ dl_priv = devlink_priv(dl);
+ ds = dl_priv->ds;
+
+ if (!ds->ops->devlink_param_set)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_param_set(ds, id, ctx);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_param_set);
+
+int dsa_devlink_params_register(struct dsa_switch *ds,
+ const struct devlink_param *params,
+ size_t params_count)
+{
+ return devlink_params_register(ds->devlink, params, params_count);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_params_register);
+
+void dsa_devlink_params_unregister(struct dsa_switch *ds,
+ const struct devlink_param *params,
+ size_t params_count)
+{
+ devlink_params_unregister(ds->devlink, params, params_count);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_params_unregister);
+
+int dsa_devlink_resource_register(struct dsa_switch *ds,
+ const char *resource_name,
+ u64 resource_size,
+ u64 resource_id,
+ u64 parent_resource_id,
+ const struct devlink_resource_size_params *size_params)
+{
+ return devlink_resource_register(ds->devlink, resource_name,
+ resource_size, resource_id,
+ parent_resource_id,
+ size_params);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_resource_register);
+
+void dsa_devlink_resources_unregister(struct dsa_switch *ds)
+{
+ devlink_resources_unregister(ds->devlink, NULL);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_resources_unregister);
+
+void dsa_devlink_resource_occ_get_register(struct dsa_switch *ds,
+ u64 resource_id,
+ devlink_resource_occ_get_t *occ_get,
+ void *occ_get_priv)
+{
+ return devlink_resource_occ_get_register(ds->devlink, resource_id,
+ occ_get, occ_get_priv);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_register);
+
+void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds,
+ u64 resource_id)
+{
+ devlink_resource_occ_get_unregister(ds->devlink, resource_id);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_unregister);
+
static int __init dsa_init_module(void)
{
int rc;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 716d265ba8ca..9ef2caa13f27 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -45,6 +45,10 @@ static struct dsa_switch_tree *dsa_tree_alloc(int index)
dst->index = index;
+ INIT_LIST_HEAD(&dst->rtable);
+
+ INIT_LIST_HEAD(&dst->ports);
+
INIT_LIST_HEAD(&dst->list);
list_add_tail(&dst->list, &dsa_tree_list);
@@ -111,24 +115,38 @@ static bool dsa_port_is_user(struct dsa_port *dp)
static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst,
struct device_node *dn)
{
- struct dsa_switch *ds;
struct dsa_port *dp;
- int device, port;
- for (device = 0; device < DSA_MAX_SWITCHES; device++) {
- ds = dst->ds[device];
- if (!ds)
- continue;
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dp->dn == dn)
+ return dp;
- for (port = 0; port < ds->num_ports; port++) {
- dp = &ds->ports[port];
+ return NULL;
+}
- if (dp->dn == dn)
- return dp;
- }
- }
+struct dsa_link *dsa_link_touch(struct dsa_port *dp, struct dsa_port *link_dp)
+{
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_switch_tree *dst;
+ struct dsa_link *dl;
- return NULL;
+ dst = ds->dst;
+
+ list_for_each_entry(dl, &dst->rtable, list)
+ if (dl->dp == dp && dl->link_dp == link_dp)
+ return dl;
+
+ dl = kzalloc(sizeof(*dl), GFP_KERNEL);
+ if (!dl)
+ return NULL;
+
+ dl->dp = dp;
+ dl->link_dp = link_dp;
+
+ INIT_LIST_HEAD(&dl->list);
+ list_add_tail(&dl->list, &dst->rtable);
+
+ return dl;
}
static bool dsa_port_setup_routing_table(struct dsa_port *dp)
@@ -138,6 +156,7 @@ static bool dsa_port_setup_routing_table(struct dsa_port *dp)
struct device_node *dn = dp->dn;
struct of_phandle_iterator it;
struct dsa_port *link_dp;
+ struct dsa_link *dl;
int err;
of_for_each_phandle(&it, err, dn, "link", NULL, 0) {
@@ -147,24 +166,22 @@ static bool dsa_port_setup_routing_table(struct dsa_port *dp)
return false;
}
- ds->rtable[link_dp->ds->index] = dp->index;
+ dl = dsa_link_touch(dp, link_dp);
+ if (!dl) {
+ of_node_put(it.node);
+ return false;
+ }
}
return true;
}
-static bool dsa_switch_setup_routing_table(struct dsa_switch *ds)
+static bool dsa_tree_setup_routing_table(struct dsa_switch_tree *dst)
{
bool complete = true;
struct dsa_port *dp;
- int i;
-
- for (i = 0; i < DSA_MAX_SWITCHES; i++)
- ds->rtable[i] = DSA_RTABLE_NONE;
-
- for (i = 0; i < ds->num_ports; i++) {
- dp = &ds->ports[i];
+ list_for_each_entry(dp, &dst->ports, list) {
if (dsa_port_is_dsa(dp)) {
complete = dsa_port_setup_routing_table(dp);
if (!complete)
@@ -175,81 +192,42 @@ static bool dsa_switch_setup_routing_table(struct dsa_switch *ds)
return complete;
}
-static bool dsa_tree_setup_routing_table(struct dsa_switch_tree *dst)
-{
- struct dsa_switch *ds;
- bool complete = true;
- int device;
-
- for (device = 0; device < DSA_MAX_SWITCHES; device++) {
- ds = dst->ds[device];
- if (!ds)
- continue;
-
- complete = dsa_switch_setup_routing_table(ds);
- if (!complete)
- break;
- }
-
- return complete;
-}
-
static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst)
{
- struct dsa_switch *ds;
struct dsa_port *dp;
- int device, port;
- for (device = 0; device < DSA_MAX_SWITCHES; device++) {
- ds = dst->ds[device];
- if (!ds)
- continue;
-
- for (port = 0; port < ds->num_ports; port++) {
- dp = &ds->ports[port];
-
- if (dsa_port_is_cpu(dp))
- return dp;
- }
- }
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_cpu(dp))
+ return dp;
return NULL;
}
static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
{
- struct dsa_switch *ds;
- struct dsa_port *dp;
- int device, port;
+ struct dsa_port *cpu_dp, *dp;
- /* DSA currently only supports a single CPU port */
- dst->cpu_dp = dsa_tree_find_first_cpu(dst);
- if (!dst->cpu_dp) {
- pr_warn("Tree has no master device\n");
+ cpu_dp = dsa_tree_find_first_cpu(dst);
+ if (!cpu_dp) {
+ pr_err("DSA: tree %d has no CPU port\n", dst->index);
return -EINVAL;
}
/* Assign the default CPU port to all ports of the fabric */
- for (device = 0; device < DSA_MAX_SWITCHES; device++) {
- ds = dst->ds[device];
- if (!ds)
- continue;
-
- for (port = 0; port < ds->num_ports; port++) {
- dp = &ds->ports[port];
-
- if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
- dp->cpu_dp = dst->cpu_dp;
- }
- }
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
+ dp->cpu_dp = cpu_dp;
return 0;
}
static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
{
- /* DSA currently only supports a single CPU port */
- dst->cpu_dp = NULL;
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
+ dp->cpu_dp = NULL;
}
static int dsa_port_setup(struct dsa_port *dp)
@@ -265,6 +243,9 @@ static int dsa_port_setup(struct dsa_port *dp)
bool dsa_port_enabled = false;
int err = 0;
+ if (dp->setup)
+ return 0;
+
switch (dp->type) {
case DSA_PORT_TYPE_UNUSED:
dsa_port_disable(dp);
@@ -333,14 +314,21 @@ static int dsa_port_setup(struct dsa_port *dp)
dsa_port_link_unregister_of(dp);
if (err && devlink_port_registered)
devlink_port_unregister(dlp);
+ if (err)
+ return err;
- return err;
+ dp->setup = true;
+
+ return 0;
}
static void dsa_port_teardown(struct dsa_port *dp)
{
struct devlink_port *dlp = &dp->devlink_port;
+ if (!dp->setup)
+ return;
+
switch (dp->type) {
case DSA_PORT_TYPE_UNUSED:
break;
@@ -363,11 +351,17 @@ static void dsa_port_teardown(struct dsa_port *dp)
}
break;
}
+
+ dp->setup = false;
}
static int dsa_switch_setup(struct dsa_switch *ds)
{
- int err = 0;
+ struct dsa_devlink_priv *dl_priv;
+ int err;
+
+ if (ds->setup)
+ return 0;
/* Initialize ds->phys_mii_mask before registering the slave MDIO bus
* driver and before ops->setup() has run, since the switch drivers and
@@ -379,9 +373,11 @@ static int dsa_switch_setup(struct dsa_switch *ds)
/* Add the switch to devlink before calling setup, so that setup can
* add dpipe tables
*/
- ds->devlink = devlink_alloc(&dsa_devlink_ops, 0);
+ ds->devlink = devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv));
if (!ds->devlink)
return -ENOMEM;
+ dl_priv = devlink_priv(ds->devlink);
+ dl_priv->ds = ds;
err = devlink_register(ds->devlink, ds->dev);
if (err)
@@ -395,6 +391,8 @@ static int dsa_switch_setup(struct dsa_switch *ds)
if (err < 0)
goto unregister_notifier;
+ devlink_params_publish(ds->devlink);
+
if (!ds->slave_mii_bus && ds->ops->phy_read) {
ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
if (!ds->slave_mii_bus) {
@@ -409,6 +407,8 @@ static int dsa_switch_setup(struct dsa_switch *ds)
goto unregister_notifier;
}
+ ds->setup = true;
+
return 0;
unregister_notifier:
@@ -424,6 +424,9 @@ free_devlink:
static void dsa_switch_teardown(struct dsa_switch *ds)
{
+ if (!ds->setup)
+ return;
+
if (ds->slave_mii_bus && ds->ops->phy_read)
mdiobus_unregister(ds->slave_mii_bus);
@@ -438,95 +441,72 @@ static void dsa_switch_teardown(struct dsa_switch *ds)
ds->devlink = NULL;
}
+ ds->setup = false;
}
static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
{
- struct dsa_switch *ds;
struct dsa_port *dp;
- int device, port, i;
- int err = 0;
-
- for (device = 0; device < DSA_MAX_SWITCHES; device++) {
- ds = dst->ds[device];
- if (!ds)
- continue;
+ int err;
- err = dsa_switch_setup(ds);
+ list_for_each_entry(dp, &dst->ports, list) {
+ err = dsa_switch_setup(dp->ds);
if (err)
- goto switch_teardown;
-
- for (port = 0; port < ds->num_ports; port++) {
- dp = &ds->ports[port];
+ goto teardown;
+ }
- err = dsa_port_setup(dp);
- if (err)
- goto ports_teardown;
- }
+ list_for_each_entry(dp, &dst->ports, list) {
+ err = dsa_port_setup(dp);
+ if (err)
+ goto teardown;
}
return 0;
-ports_teardown:
- for (i = 0; i < port; i++)
- dsa_port_teardown(&ds->ports[i]);
+teardown:
+ list_for_each_entry(dp, &dst->ports, list)
+ dsa_port_teardown(dp);
- dsa_switch_teardown(ds);
-
-switch_teardown:
- for (i = 0; i < device; i++) {
- ds = dst->ds[i];
- if (!ds)
- continue;
-
- for (port = 0; port < ds->num_ports; port++) {
- dp = &ds->ports[port];
-
- dsa_port_teardown(dp);
- }
-
- dsa_switch_teardown(ds);
- }
+ list_for_each_entry(dp, &dst->ports, list)
+ dsa_switch_teardown(dp->ds);
return err;
}
static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
{
- struct dsa_switch *ds;
struct dsa_port *dp;
- int device, port;
-
- for (device = 0; device < DSA_MAX_SWITCHES; device++) {
- ds = dst->ds[device];
- if (!ds)
- continue;
- for (port = 0; port < ds->num_ports; port++) {
- dp = &ds->ports[port];
+ list_for_each_entry(dp, &dst->ports, list)
+ dsa_port_teardown(dp);
- dsa_port_teardown(dp);
- }
-
- dsa_switch_teardown(ds);
- }
+ list_for_each_entry(dp, &dst->ports, list)
+ dsa_switch_teardown(dp->ds);
}
static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
{
- struct dsa_port *cpu_dp = dst->cpu_dp;
- struct net_device *master = cpu_dp->master;
+ struct dsa_port *dp;
+ int err;
- /* DSA currently supports a single pair of CPU port and master device */
- return dsa_master_setup(master, cpu_dp);
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dsa_port_is_cpu(dp)) {
+ err = dsa_master_setup(dp->master, dp);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
}
static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
{
- struct dsa_port *cpu_dp = dst->cpu_dp;
- struct net_device *master = cpu_dp->master;
+ struct dsa_port *dp;
- return dsa_master_teardown(master);
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_cpu(dp))
+ dsa_master_teardown(dp->master);
}
static int dsa_tree_setup(struct dsa_switch_tree *dst)
@@ -572,6 +552,8 @@ teardown_default_cpu:
static void dsa_tree_teardown(struct dsa_switch_tree *dst)
{
+ struct dsa_link *dl, *next;
+
if (!dst->setup)
return;
@@ -581,39 +563,36 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst)
dsa_tree_teardown_default_cpu(dst);
+ list_for_each_entry_safe(dl, next, &dst->rtable, list) {
+ list_del(&dl->list);
+ kfree(dl);
+ }
+
pr_info("DSA: tree %d torn down\n", dst->index);
dst->setup = false;
}
-static void dsa_tree_remove_switch(struct dsa_switch_tree *dst,
- unsigned int index)
+static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
{
- dsa_tree_teardown(dst);
+ struct dsa_switch_tree *dst = ds->dst;
+ struct dsa_port *dp;
- dst->ds[index] = NULL;
- dsa_tree_put(dst);
-}
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dp->ds == ds && dp->index == index)
+ return dp;
-static int dsa_tree_add_switch(struct dsa_switch_tree *dst,
- struct dsa_switch *ds)
-{
- unsigned int index = ds->index;
- int err;
+ dp = kzalloc(sizeof(*dp), GFP_KERNEL);
+ if (!dp)
+ return NULL;
- if (dst->ds[index])
- return -EBUSY;
+ dp->ds = ds;
+ dp->index = index;
- dsa_tree_get(dst);
- dst->ds[index] = ds;
+ INIT_LIST_HEAD(&dp->list);
+ list_add_tail(&dp->list, &dst->ports);
- err = dsa_tree_setup(dst);
- if (err) {
- dst->ds[index] = NULL;
- dsa_tree_put(dst);
- }
-
- return err;
+ return dp;
}
static int dsa_port_parse_user(struct dsa_port *dp, const char *name)
@@ -708,7 +687,7 @@ static int dsa_switch_parse_ports_of(struct dsa_switch *ds,
goto out_put_node;
}
- dp = &ds->ports[reg];
+ dp = dsa_to_port(ds, reg);
err = dsa_port_parse_of(dp, port);
if (err)
@@ -732,8 +711,6 @@ static int dsa_switch_parse_member_of(struct dsa_switch *ds,
return sz;
ds->index = m[1];
- if (ds->index >= DSA_MAX_SWITCHES)
- return -EINVAL;
ds->dst = dsa_tree_touch(m[0]);
if (!ds->dst)
@@ -742,6 +719,20 @@ static int dsa_switch_parse_member_of(struct dsa_switch *ds,
return 0;
}
+static int dsa_switch_touch_ports(struct dsa_switch *ds)
+{
+ struct dsa_port *dp;
+ int port;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ dp = dsa_port_touch(ds, port);
+ if (!dp)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn)
{
int err;
@@ -750,6 +741,10 @@ static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn)
if (err)
return err;
+ err = dsa_switch_touch_ports(ds);
+ if (err)
+ return err;
+
return dsa_switch_parse_ports_of(ds, dn);
}
@@ -787,7 +782,7 @@ static int dsa_switch_parse_ports(struct dsa_switch *ds,
for (i = 0; i < DSA_MAX_PORTS; i++) {
name = cd->port_names[i];
dev = cd->netdev[i];
- dp = &ds->ports[i];
+ dp = dsa_to_port(ds, i);
if (!name)
continue;
@@ -807,6 +802,8 @@ static int dsa_switch_parse_ports(struct dsa_switch *ds,
static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd)
{
+ int err;
+
ds->cd = cd;
/* We don't support interconnected switches nor multiple trees via
@@ -817,22 +814,29 @@ static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd)
if (!ds->dst)
return -ENOMEM;
- return dsa_switch_parse_ports(ds, cd);
-}
-
-static int dsa_switch_add(struct dsa_switch *ds)
-{
- struct dsa_switch_tree *dst = ds->dst;
+ err = dsa_switch_touch_ports(ds);
+ if (err)
+ return err;
- return dsa_tree_add_switch(dst, ds);
+ return dsa_switch_parse_ports(ds, cd);
}
static int dsa_switch_probe(struct dsa_switch *ds)
{
- struct dsa_chip_data *pdata = ds->dev->platform_data;
- struct device_node *np = ds->dev->of_node;
+ struct dsa_switch_tree *dst;
+ struct dsa_chip_data *pdata;
+ struct device_node *np;
int err;
+ if (!ds->dev)
+ return -ENODEV;
+
+ pdata = ds->dev->platform_data;
+ np = ds->dev->of_node;
+
+ if (!ds->num_ports)
+ return -EINVAL;
+
if (np)
err = dsa_switch_parse_of(ds, np);
else if (pdata)
@@ -843,29 +847,14 @@ static int dsa_switch_probe(struct dsa_switch *ds)
if (err)
return err;
- return dsa_switch_add(ds);
-}
-
-struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
-{
- struct dsa_switch *ds;
- int i;
-
- ds = devm_kzalloc(dev, struct_size(ds, ports, n), GFP_KERNEL);
- if (!ds)
- return NULL;
-
- ds->dev = dev;
- ds->num_ports = n;
-
- for (i = 0; i < ds->num_ports; ++i) {
- ds->ports[i].index = i;
- ds->ports[i].ds = ds;
- }
+ dst = ds->dst;
+ dsa_tree_get(dst);
+ err = dsa_tree_setup(dst);
+ if (err)
+ dsa_tree_put(dst);
- return ds;
+ return err;
}
-EXPORT_SYMBOL_GPL(dsa_switch_alloc);
int dsa_register_switch(struct dsa_switch *ds)
{
@@ -883,9 +872,16 @@ EXPORT_SYMBOL_GPL(dsa_register_switch);
static void dsa_switch_remove(struct dsa_switch *ds)
{
struct dsa_switch_tree *dst = ds->dst;
- unsigned int index = ds->index;
+ struct dsa_port *dp, *next;
- dsa_tree_remove_switch(dst, index);
+ dsa_tree_teardown(dst);
+
+ list_for_each_entry_safe(dp, next, &dst->ports, list) {
+ list_del(&dp->list);
+ kfree(dp);
+ }
+
+ dsa_tree_put(dst);
}
void dsa_unregister_switch(struct dsa_switch *ds)
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 12f8c7ee4dd8..53e7577896b6 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -104,25 +104,14 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
struct dsa_switch_tree *dst = cpu_dp->dst;
- struct dsa_switch *ds;
- struct dsa_port *slave_port;
+ struct dsa_port *dp;
- if (device < 0 || device >= DSA_MAX_SWITCHES)
- return NULL;
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dp->ds->index == device && dp->index == port &&
+ dp->type == DSA_PORT_TYPE_USER)
+ return dp->slave;
- ds = dst->ds[device];
- if (!ds)
- return NULL;
-
- if (port < 0 || port >= ds->num_ports)
- return NULL;
-
- slave_port = &ds->ports[port];
-
- if (unlikely(slave_port->type != DSA_PORT_TYPE_USER))
- return NULL;
-
- return slave_port->slave;
+ return NULL;
}
/* port.c */
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 9b54e5a76297..6e93c36bf0c0 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -561,7 +561,7 @@ static int dsa_port_fixed_link_register_of(struct dsa_port *dp)
struct dsa_switch *ds = dp->ds;
struct phy_device *phydev;
int port = dp->index;
- int mode;
+ phy_interface_t mode;
int err;
err = of_phy_register_fixed_link(dn);
@@ -574,8 +574,8 @@ static int dsa_port_fixed_link_register_of(struct dsa_port *dp)
phydev = of_phy_find_device(dn);
- mode = of_get_phy_mode(dn);
- if (mode < 0)
+ err = of_get_phy_mode(dn, &mode);
+ if (err)
mode = PHY_INTERFACE_MODE_NA;
phydev->interface = mode;
@@ -593,10 +593,11 @@ static int dsa_port_phylink_register(struct dsa_port *dp)
{
struct dsa_switch *ds = dp->ds;
struct device_node *port_dn = dp->dn;
- int mode, err;
+ phy_interface_t mode;
+ int err;
- mode = of_get_phy_mode(port_dn);
- if (mode < 0)
+ err = of_get_phy_mode(port_dn, &mode);
+ if (err)
mode = PHY_INTERFACE_MODE_NA;
dp->pl_config.dev = ds->dev;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 028e65f4b5ba..78ffc87dc25e 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -789,6 +789,22 @@ static int dsa_slave_set_link_ksettings(struct net_device *dev,
return phylink_ethtool_ksettings_set(dp->pl, cmd);
}
+static void dsa_slave_get_pauseparam(struct net_device *dev,
+ struct ethtool_pauseparam *pause)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ phylink_ethtool_get_pauseparam(dp->pl, pause);
+}
+
+static int dsa_slave_set_pauseparam(struct net_device *dev,
+ struct ethtool_pauseparam *pause)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ return phylink_ethtool_set_pauseparam(dp->pl, pause);
+}
+
#ifdef CONFIG_NET_POLL_CONTROLLER
static int dsa_slave_netpoll_setup(struct net_device *dev,
struct netpoll_info *ni)
@@ -1192,6 +1208,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_eee = dsa_slave_get_eee,
.get_link_ksettings = dsa_slave_get_link_ksettings,
.set_link_ksettings = dsa_slave_set_link_ksettings,
+ .get_pauseparam = dsa_slave_get_pauseparam,
+ .set_pauseparam = dsa_slave_set_pauseparam,
.get_rxnfc = dsa_slave_get_rxnfc,
.set_rxnfc = dsa_slave_set_rxnfc,
.get_ts_info = dsa_slave_get_ts_info,
@@ -1295,11 +1313,12 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
struct dsa_port *dp = dsa_slave_to_port(slave_dev);
struct device_node *port_dn = dp->dn;
struct dsa_switch *ds = dp->ds;
+ phy_interface_t mode;
u32 phy_flags = 0;
- int mode, ret;
+ int ret;
- mode = of_get_phy_mode(port_dn);
- if (mode < 0)
+ ret = of_get_phy_mode(port_dn, &mode);
+ if (ret)
mode = PHY_INTERFACE_MODE_NA;
dp->pl_config.dev = &slave_dev->dev;
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 6a9607518823..df4abe897ed6 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -20,7 +20,7 @@ static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds,
int i;
for (i = 0; i < ds->num_ports; ++i) {
- struct dsa_port *dp = &ds->ports[i];
+ struct dsa_port *dp = dsa_to_port(ds, i);
if (dp->ageing_time && dp->ageing_time < ageing_time)
ageing_time = dp->ageing_time;
@@ -98,7 +98,7 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
if (unset_vlan_filtering) {
struct switchdev_trans trans = {0};
- err = dsa_port_vlan_filtering(&ds->ports[info->port],
+ err = dsa_port_vlan_filtering(dsa_to_port(ds, info->port),
false, &trans);
if (err && err != EOPNOTSUPP)
return err;
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 9c1cc2482b68..bc5cb91bf052 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -31,15 +31,14 @@
* Must be transmitted as zero and ignored on receive.
*
* SWITCH_ID - VID[8:6]:
- * Index of switch within DSA tree. Must be between 0 and
- * DSA_MAX_SWITCHES - 1.
+ * Index of switch within DSA tree. Must be between 0 and 7.
*
* RSV - VID[5:4]:
* To be used for further expansion of PORT or for other purposes.
* Must be transmitted as zero and ignored on receive.
*
* PORT - VID[3:0]:
- * Index of switch port. Must be between 0 and DSA_MAX_PORTS - 1.
+ * Index of switch port. Must be between 0 and 15.
*/
#define DSA_8021Q_DIR_SHIFT 10
@@ -103,7 +102,7 @@ static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port)
if (!dsa_is_user_port(ds, port))
return 0;
- slave = ds->ports[port].slave;
+ slave = dsa_to_port(ds, port)->slave;
err = br_vlan_get_pvid(slave, &pvid);
if (err < 0)
@@ -118,7 +117,7 @@ static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port)
return err;
}
- return dsa_port_vid_add(&ds->ports[port], pvid, vinfo.flags);
+ return dsa_port_vid_add(dsa_to_port(ds, port), pvid, vinfo.flags);
}
/* If @enabled is true, installs @vid with @flags into the switch port's HW
@@ -130,7 +129,7 @@ static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port)
static int dsa_8021q_vid_apply(struct dsa_switch *ds, int port, u16 vid,
u16 flags, bool enabled)
{
- struct dsa_port *dp = &ds->ports[port];
+ struct dsa_port *dp = dsa_to_port(ds, port);
struct bridge_vlan_info vinfo;
int err;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 17374afee28f..9040fe55e0f5 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -244,7 +244,12 @@ int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16
eth->h_proto = type;
memcpy(eth->h_source, dev->dev_addr, ETH_ALEN);
memcpy(eth->h_dest, neigh->ha, ETH_ALEN);
- hh->hh_len = ETH_HLEN;
+
+ /* Pairs with READ_ONCE() in neigh_resolve_output(),
+ * neigh_hh_output() and neigh_update_hhs().
+ */
+ smp_store_release(&hh->hh_len, ETH_HLEN);
+
return 0;
}
EXPORT_SYMBOL(eth_header_cache);
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index ffcfcef76291..7c5a1aa5adb4 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -236,21 +236,14 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb,
struct cfg802154_registered_device **rdev,
struct wpan_dev **wpan_dev)
{
+ const struct genl_dumpit_info *info = genl_dumpit_info(cb);
int err;
rtnl_lock();
if (!cb->args[0]) {
- err = nlmsg_parse_deprecated(cb->nlh,
- GENL_HDRLEN + nl802154_fam.hdrsize,
- genl_family_attrbuf(&nl802154_fam),
- nl802154_fam.maxattr,
- nl802154_policy, NULL);
- if (err)
- goto out_unlock;
-
*wpan_dev = __cfg802154_wpan_dev_from_attrs(sock_net(skb->sk),
- genl_family_attrbuf(&nl802154_fam));
+ info->attrs);
if (IS_ERR(*wpan_dev)) {
err = PTR_ERR(*wpan_dev);
goto out_unlock;
@@ -557,17 +550,8 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb,
struct netlink_callback *cb,
struct nl802154_dump_wpan_phy_state *state)
{
- struct nlattr **tb = genl_family_attrbuf(&nl802154_fam);
- int ret = nlmsg_parse_deprecated(cb->nlh,
- GENL_HDRLEN + nl802154_fam.hdrsize,
- tb, nl802154_fam.maxattr,
- nl802154_policy, NULL);
-
- /* TODO check if we can handle error here,
- * we have no backward compatibility
- */
- if (ret)
- return 0;
+ const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+ struct nlattr **tb = info->attrs;
if (tb[NL802154_ATTR_WPAN_PHY])
state->filter_wpan_phy = nla_get_u32(tb[NL802154_ATTR_WPAN_PHY]);
@@ -2203,7 +2187,8 @@ static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
static const struct genl_ops nl802154_ops[] = {
{
.cmd = NL802154_CMD_GET_WPAN_PHY,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
.doit = nl802154_get_wpan_phy,
.dumpit = nl802154_dump_wpan_phy,
.done = nl802154_dump_wpan_phy_done,
@@ -2343,7 +2328,8 @@ static const struct genl_ops nl802154_ops[] = {
},
{
.cmd = NL802154_CMD_GET_SEC_KEY,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
/* TODO .doit by matching key id? */
.dumpit = nl802154_dump_llsec_key,
.flags = GENL_ADMIN_PERM,
@@ -2369,7 +2355,8 @@ static const struct genl_ops nl802154_ops[] = {
/* TODO unique identifier must short+pan OR extended_addr */
{
.cmd = NL802154_CMD_GET_SEC_DEV,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
/* TODO .doit by matching extended_addr? */
.dumpit = nl802154_dump_llsec_dev,
.flags = GENL_ADMIN_PERM,
@@ -2395,7 +2382,8 @@ static const struct genl_ops nl802154_ops[] = {
/* TODO remove complete devkey, put it as nested? */
{
.cmd = NL802154_CMD_GET_SEC_DEVKEY,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
/* TODO doit by matching ??? */
.dumpit = nl802154_dump_llsec_devkey,
.flags = GENL_ADMIN_PERM,
@@ -2420,7 +2408,8 @@ static const struct genl_ops nl802154_ops[] = {
},
{
.cmd = NL802154_CMD_GET_SEC_LEVEL,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
/* TODO .doit by matching frame_type? */
.dumpit = nl802154_dump_llsec_seclevel,
.flags = GENL_ADMIN_PERM,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 70f92aaca411..53de8e00990e 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -208,7 +208,7 @@ int inet_listen(struct socket *sock, int backlog)
if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto out;
- sk->sk_max_ack_backlog = backlog;
+ WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
/* Really, if the socket is already in listen state
* we can only allow the backlog to be adjusted.
*/
diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c
index b804ccbdb241..0c28bd469a68 100644
--- a/net/ipv4/fib_notifier.c
+++ b/net/ipv4/fib_notifier.c
@@ -9,12 +9,12 @@
#include <net/netns/ipv4.h>
#include <net/ip_fib.h>
-int call_fib4_notifier(struct notifier_block *nb, struct net *net,
+int call_fib4_notifier(struct notifier_block *nb,
enum fib_event_type event_type,
struct fib_notifier_info *info)
{
info->family = AF_INET;
- return call_fib_notifier(nb, net, event_type, info);
+ return call_fib_notifier(nb, event_type, info);
}
int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
@@ -34,17 +34,16 @@ static unsigned int fib4_seq_read(struct net *net)
return net->ipv4.fib_seq + fib4_rules_seq_read(net);
}
-static int fib4_dump(struct net *net, struct notifier_block *nb)
+static int fib4_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
int err;
- err = fib4_rules_dump(net, nb);
+ err = fib4_rules_dump(net, nb, extack);
if (err)
return err;
- fib_notify(net, nb);
-
- return 0;
+ return fib_notify(net, nb, extack);
}
static const struct fib_notifier_ops fib4_notifier_ops_template = {
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index b43a7ba5c6a4..f99e3bac5cab 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -65,9 +65,10 @@ bool fib4_rule_default(const struct fib_rule *rule)
}
EXPORT_SYMBOL_GPL(fib4_rule_default);
-int fib4_rules_dump(struct net *net, struct notifier_block *nb)
+int fib4_rules_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
- return fib_rules_dump(net, nb, AF_INET);
+ return fib_rules_dump(net, nb, AF_INET, extack);
}
unsigned int fib4_rules_seq_read(struct net *net)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1ab2fb6bb37d..b9df9c09b84e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -74,11 +74,13 @@
#include <trace/events/fib.h>
#include "fib_lookup.h"
-static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
+static int call_fib_entry_notifier(struct notifier_block *nb,
enum fib_event_type event_type, u32 dst,
- int dst_len, struct fib_alias *fa)
+ int dst_len, struct fib_alias *fa,
+ struct netlink_ext_ack *extack)
{
struct fib_entry_notifier_info info = {
+ .info.extack = extack,
.dst = dst,
.dst_len = dst_len,
.fi = fa->fa_info,
@@ -86,7 +88,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
.type = fa->fa_type,
.tb_id = fa->tb_id,
};
- return call_fib4_notifier(nb, net, event_type, &info.info);
+ return call_fib4_notifier(nb, event_type, &info.info);
}
static int call_fib_entry_notifiers(struct net *net,
@@ -2015,10 +2017,12 @@ void fib_info_notify_update(struct net *net, struct nl_info *info)
}
}
-static void fib_leaf_notify(struct net *net, struct key_vector *l,
- struct fib_table *tb, struct notifier_block *nb)
+static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb,
+ struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
struct fib_alias *fa;
+ int err;
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
struct fib_info *fi = fa->fa_info;
@@ -2032,39 +2036,53 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l,
if (tb->tb_id != fa->tb_id)
continue;
- call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key,
- KEYLENGTH - fa->fa_slen, fa);
+ err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_ADD, l->key,
+ KEYLENGTH - fa->fa_slen,
+ fa, extack);
+ if (err)
+ return err;
}
+ return 0;
}
-static void fib_table_notify(struct net *net, struct fib_table *tb,
- struct notifier_block *nb)
+static int fib_table_notify(struct fib_table *tb, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
struct trie *t = (struct trie *)tb->tb_data;
struct key_vector *l, *tp = t->kv;
t_key key = 0;
+ int err;
while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
- fib_leaf_notify(net, l, tb, nb);
+ err = fib_leaf_notify(l, tb, nb, extack);
+ if (err)
+ return err;
key = l->key + 1;
/* stop in case of wrap around */
if (key < l->key)
break;
}
+ return 0;
}
-void fib_notify(struct net *net, struct notifier_block *nb)
+int fib_notify(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
unsigned int h;
+ int err;
for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
struct hlist_head *head = &net->ipv4.fib_table_hash[h];
struct fib_table *tb;
- hlist_for_each_entry_rcu(tb, head, tb_hlist)
- fib_table_notify(net, tb, nb);
+ hlist_for_each_entry_rcu(tb, head, tb_hlist) {
+ err = fib_table_notify(tb, nb, extack);
+ if (err)
+ return err;
+ }
}
+ return 0;
}
static void __trie_free_rcu(struct rcu_head *head)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4298aae74e0e..18068ed42f25 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -249,10 +249,11 @@ bool icmp_global_allow(void)
bool rc = false;
/* Check if token bucket is empty and cannot be refilled
- * without taking the spinlock.
+ * without taking the spinlock. The READ_ONCE() are paired
+ * with the following WRITE_ONCE() in this same function.
*/
- if (!icmp_global.credit) {
- delta = min_t(u32, now - icmp_global.stamp, HZ);
+ if (!READ_ONCE(icmp_global.credit)) {
+ delta = min_t(u32, now - READ_ONCE(icmp_global.stamp), HZ);
if (delta < HZ / 50)
return false;
}
@@ -262,14 +263,14 @@ bool icmp_global_allow(void)
if (delta >= HZ / 50) {
incr = sysctl_icmp_msgs_per_sec * delta / HZ ;
if (incr)
- icmp_global.stamp = now;
+ WRITE_ONCE(icmp_global.stamp, now);
}
credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst);
if (credit) {
credit--;
rc = true;
}
- icmp_global.credit = credit;
+ WRITE_ONCE(icmp_global.credit, credit);
spin_unlock(&icmp_global.lock);
return rc;
}
@@ -682,7 +683,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
dev = dev_get_by_index_rcu(net, inet_iif(skb_in));
if (dev)
- saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
+ saddr = inet_select_addr(dev, iph->saddr,
+ RT_SCOPE_LINK);
else
saddr = 0;
rcu_read_unlock();
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 480d0b22db1a..3b9c7a2725a9 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1563,7 +1563,7 @@ static int ip_mc_check_igmp_msg(struct sk_buff *skb)
}
}
-static inline __sum16 ip_mc_validate_checksum(struct sk_buff *skb)
+static __sum16 ip_mc_validate_checksum(struct sk_buff *skb)
{
return skb_checksum_simple_validate(skb);
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index eb30fc1770de..e4c6e8b40490 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -716,7 +716,7 @@ static void reqsk_timer_handler(struct timer_list *t)
* ones are about to clog our table.
*/
qlen = reqsk_queue_len(queue);
- if ((qlen << 1) > max(8U, sk_listener->sk_max_ack_backlog)) {
+ if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) {
int young = reqsk_queue_len_young(queue) << 1;
while (thresh > 2) {
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 7dc79b973e6e..af154977904c 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -226,17 +226,17 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
r->idiag_timer = 1;
r->idiag_retrans = icsk->icsk_retransmits;
r->idiag_expires =
- jiffies_to_msecs(icsk->icsk_timeout - jiffies);
+ jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies);
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
r->idiag_timer = 4;
r->idiag_retrans = icsk->icsk_probes_out;
r->idiag_expires =
- jiffies_to_msecs(icsk->icsk_timeout - jiffies);
+ jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies);
} else if (timer_pending(&sk->sk_timer)) {
r->idiag_timer = 2;
r->idiag_retrans = icsk->icsk_probes_out;
r->idiag_expires =
- jiffies_to_msecs(sk->sk_timer.expires - jiffies);
+ jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies);
} else {
r->idiag_timer = 0;
r->idiag_expires = 0;
@@ -342,16 +342,13 @@ static int inet_twsk_diag_fill(struct sock *sk,
r = nlmsg_data(nlh);
BUG_ON(tw->tw_state != TCP_TIME_WAIT);
- tmo = tw->tw_timer.expires - jiffies;
- if (tmo < 0)
- tmo = 0;
-
inet_diag_msg_common_fill(r, sk);
r->idiag_retrans = 0;
r->idiag_state = tw->tw_substate;
r->idiag_timer = 3;
- r->idiag_expires = jiffies_to_msecs(tmo);
+ tmo = tw->tw_timer.expires - jiffies;
+ r->idiag_expires = jiffies_delta_to_msecs(tmo);
r->idiag_rqueue = 0;
r->idiag_wqueue = 0;
r->idiag_uid = 0;
@@ -385,7 +382,7 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
offsetof(struct sock, sk_cookie));
tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies;
- r->idiag_expires = (tmo >= 0) ? jiffies_to_msecs(tmo) : 0;
+ r->idiag_expires = jiffies_delta_to_msecs(tmo);
r->idiag_rqueue = 0;
r->idiag_wqueue = 0;
r->idiag_uid = 0;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index be778599bfed..ff327a62c9ce 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -160,7 +160,12 @@ static void inet_peer_gc(struct inet_peer_base *base,
base->total / inet_peer_threshold * HZ;
for (i = 0; i < gc_cnt; i++) {
p = gc_stack[i];
- delta = (__u32)jiffies - p->dtime;
+
+ /* The READ_ONCE() pairs with the WRITE_ONCE()
+ * in inet_putpeer()
+ */
+ delta = (__u32)jiffies - READ_ONCE(p->dtime);
+
if (delta < ttl || !refcount_dec_if_one(&p->refcnt))
gc_stack[i] = NULL;
}
@@ -237,7 +242,10 @@ EXPORT_SYMBOL_GPL(inet_getpeer);
void inet_putpeer(struct inet_peer *p)
{
- p->dtime = (__u32)jiffies;
+ /* The WRITE_ONCE() pairs with itself (we run lockless)
+ * and the READ_ONCE() in inet_peer_gc()
+ */
+ WRITE_ONCE(p->dtime, (__u32)jiffies);
if (refcount_dec_and_test(&p->refcnt))
call_rcu(&p->rcu, inetpeer_free_rcu);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index c59a78a267c3..24a95126e698 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -611,5 +611,6 @@ void ip_list_rcv(struct list_head *head, struct packet_type *pt,
list_add_tail(&skb->list, &sublist);
}
/* dispatch final sublist */
- ip_sublist_rcv(&sublist, curr_dev, curr_net);
+ if (!list_empty(&sublist))
+ ip_sublist_rcv(&sublist, curr_dev, curr_net);
}
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 1452a97914a0..d4f84bf9289a 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -34,6 +34,9 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/dst_metadata.h>
+#include <net/geneve.h>
+#include <net/vxlan.h>
+#include <net/erspan.h>
const struct ip_tunnel_encap_ops __rcu *
iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
@@ -126,15 +129,14 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
if (!md || md->type != METADATA_IP_TUNNEL ||
md->u.tun_info.mode & IP_TUNNEL_INFO_TX)
-
return NULL;
- res = metadata_dst_alloc(0, METADATA_IP_TUNNEL, flags);
+ src = &md->u.tun_info;
+ res = metadata_dst_alloc(src->options_len, METADATA_IP_TUNNEL, flags);
if (!res)
return NULL;
dst = &res->u.tun_info;
- src = &md->u.tun_info;
dst->key.tun_id = src->key.tun_id;
if (src->mode & IP_TUNNEL_INFO_IPV6)
memcpy(&dst->key.u.ipv6.dst, &src->key.u.ipv6.src,
@@ -143,6 +145,8 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
dst->key.u.ipv4.dst = src->key.u.ipv4.src;
dst->key.tun_flags = src->key.tun_flags;
dst->mode = src->mode | IP_TUNNEL_INFO_TX;
+ ip_tunnel_info_opts_set(dst, ip_tunnel_info_opts(src),
+ src->options_len, 0);
return res;
}
@@ -217,24 +221,199 @@ static const struct nla_policy ip_tun_policy[LWTUNNEL_IP_MAX + 1] = {
[LWTUNNEL_IP_TTL] = { .type = NLA_U8 },
[LWTUNNEL_IP_TOS] = { .type = NLA_U8 },
[LWTUNNEL_IP_FLAGS] = { .type = NLA_U16 },
+ [LWTUNNEL_IP_OPTS] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy ip_opts_policy[LWTUNNEL_IP_OPTS_MAX + 1] = {
+ [LWTUNNEL_IP_OPTS_GENEVE] = { .type = NLA_NESTED },
+ [LWTUNNEL_IP_OPTS_VXLAN] = { .type = NLA_NESTED },
+ [LWTUNNEL_IP_OPTS_ERSPAN] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+geneve_opt_policy[LWTUNNEL_IP_OPT_GENEVE_MAX + 1] = {
+ [LWTUNNEL_IP_OPT_GENEVE_CLASS] = { .type = NLA_U16 },
+ [LWTUNNEL_IP_OPT_GENEVE_TYPE] = { .type = NLA_U8 },
+ [LWTUNNEL_IP_OPT_GENEVE_DATA] = { .type = NLA_BINARY, .len = 128 },
+};
+
+static const struct nla_policy
+vxlan_opt_policy[LWTUNNEL_IP_OPT_VXLAN_MAX + 1] = {
+ [LWTUNNEL_IP_OPT_VXLAN_GBP] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy
+erspan_opt_policy[LWTUNNEL_IP_OPT_ERSPAN_MAX + 1] = {
+ [LWTUNNEL_IP_OPT_ERSPAN_VER] = { .type = NLA_U8 },
+ [LWTUNNEL_IP_OPT_ERSPAN_INDEX] = { .type = NLA_U32 },
+ [LWTUNNEL_IP_OPT_ERSPAN_DIR] = { .type = NLA_U8 },
+ [LWTUNNEL_IP_OPT_ERSPAN_HWID] = { .type = NLA_U8 },
};
+static int ip_tun_parse_opts_geneve(struct nlattr *attr,
+ struct ip_tunnel_info *info,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[LWTUNNEL_IP_OPT_GENEVE_MAX + 1];
+ int data_len, err;
+
+ err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_OPT_GENEVE_MAX,
+ attr, geneve_opt_policy, extack);
+ if (err)
+ return err;
+
+ if (!tb[LWTUNNEL_IP_OPT_GENEVE_CLASS] ||
+ !tb[LWTUNNEL_IP_OPT_GENEVE_TYPE] ||
+ !tb[LWTUNNEL_IP_OPT_GENEVE_DATA])
+ return -EINVAL;
+
+ attr = tb[LWTUNNEL_IP_OPT_GENEVE_DATA];
+ data_len = nla_len(attr);
+ if (data_len % 4)
+ return -EINVAL;
+
+ if (info) {
+ struct geneve_opt *opt = ip_tunnel_info_opts(info);
+
+ memcpy(opt->opt_data, nla_data(attr), data_len);
+ opt->length = data_len / 4;
+ attr = tb[LWTUNNEL_IP_OPT_GENEVE_CLASS];
+ opt->opt_class = nla_get_be16(attr);
+ attr = tb[LWTUNNEL_IP_OPT_GENEVE_TYPE];
+ opt->type = nla_get_u8(attr);
+ info->key.tun_flags |= TUNNEL_GENEVE_OPT;
+ }
+
+ return sizeof(struct geneve_opt) + data_len;
+}
+
+static int ip_tun_parse_opts_vxlan(struct nlattr *attr,
+ struct ip_tunnel_info *info,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[LWTUNNEL_IP_OPT_VXLAN_MAX + 1];
+ int err;
+
+ err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_OPT_VXLAN_MAX,
+ attr, vxlan_opt_policy, extack);
+ if (err)
+ return err;
+
+ if (!tb[LWTUNNEL_IP_OPT_VXLAN_GBP])
+ return -EINVAL;
+
+ if (info) {
+ struct vxlan_metadata *md = ip_tunnel_info_opts(info);
+
+ attr = tb[LWTUNNEL_IP_OPT_VXLAN_GBP];
+ md->gbp = nla_get_u32(attr);
+ info->key.tun_flags |= TUNNEL_VXLAN_OPT;
+ }
+
+ return sizeof(struct vxlan_metadata);
+}
+
+static int ip_tun_parse_opts_erspan(struct nlattr *attr,
+ struct ip_tunnel_info *info,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[LWTUNNEL_IP_OPT_ERSPAN_MAX + 1];
+ int err;
+
+ err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_OPT_ERSPAN_MAX,
+ attr, erspan_opt_policy, extack);
+ if (err)
+ return err;
+
+ if (!tb[LWTUNNEL_IP_OPT_ERSPAN_VER])
+ return -EINVAL;
+
+ if (info) {
+ struct erspan_metadata *md = ip_tunnel_info_opts(info);
+
+ attr = tb[LWTUNNEL_IP_OPT_ERSPAN_VER];
+ md->version = nla_get_u8(attr);
+
+ if (md->version == 1 && tb[LWTUNNEL_IP_OPT_ERSPAN_INDEX]) {
+ attr = tb[LWTUNNEL_IP_OPT_ERSPAN_INDEX];
+ md->u.index = nla_get_be32(attr);
+ } else if (md->version == 2 && tb[LWTUNNEL_IP_OPT_ERSPAN_DIR] &&
+ tb[LWTUNNEL_IP_OPT_ERSPAN_HWID]) {
+ attr = tb[LWTUNNEL_IP_OPT_ERSPAN_DIR];
+ md->u.md2.dir = nla_get_u8(attr);
+ attr = tb[LWTUNNEL_IP_OPT_ERSPAN_HWID];
+ set_hwid(&md->u.md2, nla_get_u8(attr));
+ } else {
+ return -EINVAL;
+ }
+
+ info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ }
+
+ return sizeof(struct erspan_metadata);
+}
+
+static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[LWTUNNEL_IP_OPTS_MAX + 1];
+ int err;
+
+ if (!attr)
+ return 0;
+
+ err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_OPTS_MAX, attr,
+ ip_opts_policy, extack);
+ if (err)
+ return err;
+
+ if (tb[LWTUNNEL_IP_OPTS_GENEVE])
+ err = ip_tun_parse_opts_geneve(tb[LWTUNNEL_IP_OPTS_GENEVE],
+ info, extack);
+ else if (tb[LWTUNNEL_IP_OPTS_VXLAN])
+ err = ip_tun_parse_opts_vxlan(tb[LWTUNNEL_IP_OPTS_VXLAN],
+ info, extack);
+ else if (tb[LWTUNNEL_IP_OPTS_ERSPAN])
+ err = ip_tun_parse_opts_erspan(tb[LWTUNNEL_IP_OPTS_ERSPAN],
+ info, extack);
+ else
+ err = -EINVAL;
+
+ return err;
+}
+
+static int ip_tun_get_optlen(struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ return ip_tun_parse_opts(attr, NULL, extack);
+}
+
+static int ip_tun_set_opts(struct nlattr *attr, struct ip_tunnel_info *info,
+ struct netlink_ext_ack *extack)
+{
+ return ip_tun_parse_opts(attr, info, extack);
+}
+
static int ip_tun_build_state(struct nlattr *attr,
unsigned int family, const void *cfg,
struct lwtunnel_state **ts,
struct netlink_ext_ack *extack)
{
- struct ip_tunnel_info *tun_info;
- struct lwtunnel_state *new_state;
struct nlattr *tb[LWTUNNEL_IP_MAX + 1];
- int err;
+ struct lwtunnel_state *new_state;
+ struct ip_tunnel_info *tun_info;
+ int err, opt_len;
err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_MAX, attr,
ip_tun_policy, extack);
if (err < 0)
return err;
- new_state = lwtunnel_state_alloc(sizeof(*tun_info));
+ opt_len = ip_tun_get_optlen(tb[LWTUNNEL_IP_OPTS], extack);
+ if (opt_len < 0)
+ return opt_len;
+
+ new_state = lwtunnel_state_alloc(sizeof(*tun_info) + opt_len);
if (!new_state)
return -ENOMEM;
@@ -242,6 +421,12 @@ static int ip_tun_build_state(struct nlattr *attr,
tun_info = lwt_tun_info(new_state);
+ err = ip_tun_set_opts(tb[LWTUNNEL_IP_OPTS], tun_info, extack);
+ if (err < 0) {
+ lwtstate_free(new_state);
+ return err;
+ }
+
#ifdef CONFIG_DST_CACHE
err = dst_cache_init(&tun_info->dst_cache, GFP_KERNEL);
if (err) {
@@ -266,10 +451,10 @@ static int ip_tun_build_state(struct nlattr *attr,
tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]);
if (tb[LWTUNNEL_IP_FLAGS])
- tun_info->key.tun_flags = nla_get_be16(tb[LWTUNNEL_IP_FLAGS]);
+ tun_info->key.tun_flags |= nla_get_be16(tb[LWTUNNEL_IP_FLAGS]);
tun_info->mode = IP_TUNNEL_INFO_TX;
- tun_info->options_len = 0;
+ tun_info->options_len = opt_len;
*ts = new_state;
@@ -285,6 +470,110 @@ static void ip_tun_destroy_state(struct lwtunnel_state *lwtstate)
#endif
}
+static int ip_tun_fill_encap_opts_geneve(struct sk_buff *skb,
+ struct ip_tunnel_info *tun_info)
+{
+ struct geneve_opt *opt;
+ struct nlattr *nest;
+
+ nest = nla_nest_start_noflag(skb, LWTUNNEL_IP_OPTS_GENEVE);
+ if (!nest)
+ return -ENOMEM;
+
+ opt = ip_tunnel_info_opts(tun_info);
+ if (nla_put_be16(skb, LWTUNNEL_IP_OPT_GENEVE_CLASS, opt->opt_class) ||
+ nla_put_u8(skb, LWTUNNEL_IP_OPT_GENEVE_TYPE, opt->type) ||
+ nla_put(skb, LWTUNNEL_IP_OPT_GENEVE_DATA, opt->length * 4,
+ opt->opt_data)) {
+ nla_nest_cancel(skb, nest);
+ return -ENOMEM;
+ }
+
+ nla_nest_end(skb, nest);
+ return 0;
+}
+
+static int ip_tun_fill_encap_opts_vxlan(struct sk_buff *skb,
+ struct ip_tunnel_info *tun_info)
+{
+ struct vxlan_metadata *md;
+ struct nlattr *nest;
+
+ nest = nla_nest_start_noflag(skb, LWTUNNEL_IP_OPTS_VXLAN);
+ if (!nest)
+ return -ENOMEM;
+
+ md = ip_tunnel_info_opts(tun_info);
+ if (nla_put_u32(skb, LWTUNNEL_IP_OPT_VXLAN_GBP, md->gbp)) {
+ nla_nest_cancel(skb, nest);
+ return -ENOMEM;
+ }
+
+ nla_nest_end(skb, nest);
+ return 0;
+}
+
+static int ip_tun_fill_encap_opts_erspan(struct sk_buff *skb,
+ struct ip_tunnel_info *tun_info)
+{
+ struct erspan_metadata *md;
+ struct nlattr *nest;
+
+ nest = nla_nest_start_noflag(skb, LWTUNNEL_IP_OPTS_ERSPAN);
+ if (!nest)
+ return -ENOMEM;
+
+ md = ip_tunnel_info_opts(tun_info);
+ if (nla_put_u32(skb, LWTUNNEL_IP_OPT_ERSPAN_VER, md->version))
+ goto err;
+
+ if (md->version == 1 &&
+ nla_put_be32(skb, LWTUNNEL_IP_OPT_ERSPAN_INDEX, md->u.index))
+ goto err;
+
+ if (md->version == 2 &&
+ (nla_put_u8(skb, LWTUNNEL_IP_OPT_ERSPAN_DIR, md->u.md2.dir) ||
+ nla_put_u8(skb, LWTUNNEL_IP_OPT_ERSPAN_HWID,
+ get_hwid(&md->u.md2))))
+ goto err;
+
+ nla_nest_end(skb, nest);
+ return 0;
+err:
+ nla_nest_cancel(skb, nest);
+ return -ENOMEM;
+}
+
+static int ip_tun_fill_encap_opts(struct sk_buff *skb, int type,
+ struct ip_tunnel_info *tun_info)
+{
+ struct nlattr *nest;
+ int err = 0;
+
+ if (!(tun_info->key.tun_flags &
+ (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT)))
+ return 0;
+
+ nest = nla_nest_start_noflag(skb, type);
+ if (!nest)
+ return -ENOMEM;
+
+ if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT)
+ err = ip_tun_fill_encap_opts_geneve(skb, tun_info);
+ else if (tun_info->key.tun_flags & TUNNEL_VXLAN_OPT)
+ err = ip_tun_fill_encap_opts_vxlan(skb, tun_info);
+ else if (tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)
+ err = ip_tun_fill_encap_opts_erspan(skb, tun_info);
+
+ if (err) {
+ nla_nest_cancel(skb, nest);
+ return err;
+ }
+
+ nla_nest_end(skb, nest);
+ return 0;
+}
+
static int ip_tun_fill_encap_info(struct sk_buff *skb,
struct lwtunnel_state *lwtstate)
{
@@ -296,12 +585,42 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb,
nla_put_in_addr(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) ||
nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) ||
nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) ||
- nla_put_be16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags))
+ nla_put_be16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags) ||
+ ip_tun_fill_encap_opts(skb, LWTUNNEL_IP_OPTS, tun_info))
return -ENOMEM;
return 0;
}
+static int ip_tun_opts_nlsize(struct ip_tunnel_info *info)
+{
+ int opt_len;
+
+ if (!(info->key.tun_flags &
+ (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT)))
+ return 0;
+
+ opt_len = nla_total_size(0); /* LWTUNNEL_IP_OPTS */
+ if (info->key.tun_flags & TUNNEL_GENEVE_OPT) {
+ struct geneve_opt *opt = ip_tunnel_info_opts(info);
+
+ opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_GENEVE */
+ + nla_total_size(2) /* OPT_GENEVE_CLASS */
+ + nla_total_size(1) /* OPT_GENEVE_TYPE */
+ + nla_total_size(opt->length * 4);
+ /* OPT_GENEVE_DATA */
+ } else if (info->key.tun_flags & TUNNEL_VXLAN_OPT) {
+ opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_VXLAN */
+ + nla_total_size(4); /* OPT_VXLAN_GBP */
+ } else if (info->key.tun_flags & TUNNEL_ERSPAN_OPT) {
+ opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_ERSPAN */
+ + nla_total_size(1) /* OPT_ERSPAN_VER */
+ + nla_total_size(4); /* OPT_ERSPAN_INDEX/DIR/HWID */
+ }
+
+ return opt_len;
+}
+
static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate)
{
return nla_total_size_64bit(8) /* LWTUNNEL_IP_ID */
@@ -309,13 +628,21 @@ static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate)
+ nla_total_size(4) /* LWTUNNEL_IP_SRC */
+ nla_total_size(1) /* LWTUNNEL_IP_TOS */
+ nla_total_size(1) /* LWTUNNEL_IP_TTL */
- + nla_total_size(2); /* LWTUNNEL_IP_FLAGS */
+ + nla_total_size(2) /* LWTUNNEL_IP_FLAGS */
+ + ip_tun_opts_nlsize(lwt_tun_info(lwtstate));
+ /* LWTUNNEL_IP_OPTS */
}
static int ip_tun_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
{
- return memcmp(lwt_tun_info(a), lwt_tun_info(b),
- sizeof(struct ip_tunnel_info));
+ struct ip_tunnel_info *info_a = lwt_tun_info(a);
+ struct ip_tunnel_info *info_b = lwt_tun_info(b);
+
+ return memcmp(info_a, info_b, sizeof(info_a->key)) ||
+ info_a->mode != info_b->mode ||
+ info_a->options_len != info_b->options_len ||
+ memcmp(ip_tunnel_info_opts(info_a),
+ ip_tunnel_info_opts(info_b), info_a->options_len);
}
static const struct lwtunnel_encap_ops ip_tun_lwt_ops = {
@@ -341,17 +668,21 @@ static int ip6_tun_build_state(struct nlattr *attr,
struct lwtunnel_state **ts,
struct netlink_ext_ack *extack)
{
- struct ip_tunnel_info *tun_info;
- struct lwtunnel_state *new_state;
struct nlattr *tb[LWTUNNEL_IP6_MAX + 1];
- int err;
+ struct lwtunnel_state *new_state;
+ struct ip_tunnel_info *tun_info;
+ int err, opt_len;
err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP6_MAX, attr,
ip6_tun_policy, extack);
if (err < 0)
return err;
- new_state = lwtunnel_state_alloc(sizeof(*tun_info));
+ opt_len = ip_tun_get_optlen(tb[LWTUNNEL_IP6_OPTS], extack);
+ if (opt_len < 0)
+ return opt_len;
+
+ new_state = lwtunnel_state_alloc(sizeof(*tun_info) + opt_len);
if (!new_state)
return -ENOMEM;
@@ -359,6 +690,12 @@ static int ip6_tun_build_state(struct nlattr *attr,
tun_info = lwt_tun_info(new_state);
+ err = ip_tun_set_opts(tb[LWTUNNEL_IP6_OPTS], tun_info, extack);
+ if (err < 0) {
+ lwtstate_free(new_state);
+ return err;
+ }
+
if (tb[LWTUNNEL_IP6_ID])
tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP6_ID]);
@@ -375,10 +712,10 @@ static int ip6_tun_build_state(struct nlattr *attr,
tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]);
if (tb[LWTUNNEL_IP6_FLAGS])
- tun_info->key.tun_flags = nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]);
+ tun_info->key.tun_flags |= nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]);
tun_info->mode = IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6;
- tun_info->options_len = 0;
+ tun_info->options_len = opt_len;
*ts = new_state;
@@ -396,7 +733,8 @@ static int ip6_tun_fill_encap_info(struct sk_buff *skb,
nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) ||
nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.tos) ||
nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.ttl) ||
- nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags))
+ nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags) ||
+ ip_tun_fill_encap_opts(skb, LWTUNNEL_IP6_OPTS, tun_info))
return -ENOMEM;
return 0;
@@ -409,7 +747,9 @@ static int ip6_tun_encap_nlsize(struct lwtunnel_state *lwtstate)
+ nla_total_size(16) /* LWTUNNEL_IP6_SRC */
+ nla_total_size(1) /* LWTUNNEL_IP6_HOPLIMIT */
+ nla_total_size(1) /* LWTUNNEL_IP6_TC */
- + nla_total_size(2); /* LWTUNNEL_IP6_FLAGS */
+ + nla_total_size(2) /* LWTUNNEL_IP6_FLAGS */
+ + ip_tun_opts_nlsize(lwt_tun_info(lwtstate));
+ /* LWTUNNEL_IP6_OPTS */
}
static const struct lwtunnel_encap_ops ip6_tun_lwt_ops = {
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 9bcca08efec9..32e20b758b68 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1483,10 +1483,10 @@ static int __init ip_auto_config(void)
* missing values.
*/
if (ic_myaddr == NONE ||
-#ifdef CONFIG_ROOT_NFS
+#if defined(CONFIG_ROOT_NFS) || defined(CONFIG_CIFS_ROOT)
(root_server_addr == NONE &&
ic_servaddr == NONE &&
- ROOT_DEV == Root_NFS) ||
+ (ROOT_DEV == Root_NFS || ROOT_DEV == Root_CIFS)) ||
#endif
ic_first_dev->next) {
#ifdef IPCONFIG_DYNAMIC
@@ -1513,6 +1513,12 @@ static int __init ip_auto_config(void)
goto try_try_again;
}
#endif
+#ifdef CONFIG_CIFS_ROOT
+ if (ROOT_DEV == Root_CIFS) {
+ pr_err("IP-Config: Retrying forever (CIFS root)...\n");
+ goto try_try_again;
+ }
+#endif
if (--retries) {
pr_err("IP-Config: Reopening network devices...\n");
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 716d5472c022..440294bdb752 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -278,9 +278,10 @@ static void __net_exit ipmr_rules_exit(struct net *net)
rtnl_unlock();
}
-static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
+static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
- return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR);
+ return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR, extack);
}
static unsigned int ipmr_rules_seq_read(struct net *net)
@@ -336,7 +337,8 @@ static void __net_exit ipmr_rules_exit(struct net *net)
rtnl_unlock();
}
-static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
+static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
return 0;
}
@@ -3040,10 +3042,11 @@ static unsigned int ipmr_seq_read(struct net *net)
return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net);
}
-static int ipmr_dump(struct net *net, struct notifier_block *nb)
+static int ipmr_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump,
- ipmr_mr_table_iter, &mrt_lock);
+ ipmr_mr_table_iter, &mrt_lock, extack);
}
static const struct fib_notifier_ops ipmr_notifier_ops_template = {
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index ea48bd15a575..aa8738a91210 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -386,15 +386,17 @@ EXPORT_SYMBOL(mr_rtm_dumproute);
int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
int (*rules_dump)(struct net *net,
- struct notifier_block *nb),
+ struct notifier_block *nb,
+ struct netlink_ext_ack *extack),
struct mr_table *(*mr_iter)(struct net *net,
struct mr_table *mrt),
- rwlock_t *mrt_lock)
+ rwlock_t *mrt_lock,
+ struct netlink_ext_ack *extack)
{
struct mr_table *mrt;
int err;
- err = rules_dump(net, nb);
+ err = rules_dump(net, nb, extack);
if (err)
return err;
@@ -409,17 +411,25 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
if (!v->dev)
continue;
- mr_call_vif_notifier(nb, net, family,
- FIB_EVENT_VIF_ADD,
- v, vifi, mrt->id);
+ err = mr_call_vif_notifier(nb, family,
+ FIB_EVENT_VIF_ADD,
+ v, vifi, mrt->id, extack);
+ if (err)
+ break;
}
read_unlock(mrt_lock);
+ if (err)
+ return err;
+
/* Notify on table MFC entries */
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
- mr_call_mfc_notifier(nb, net, family,
- FIB_EVENT_ENTRY_ADD,
- mfc, mrt->id);
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+ err = mr_call_mfc_notifier(nb, family,
+ FIB_EVENT_ENTRY_ADD,
+ mfc, mrt->id, extack);
+ if (err)
+ return err;
+ }
}
return 0;
diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c
index 36a28d46149c..c94445b44d8c 100644
--- a/net/ipv4/netfilter/nf_socket_ipv4.c
+++ b/net/ipv4/netfilter/nf_socket_ipv4.c
@@ -31,16 +31,8 @@ extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol,
if (icmph == NULL)
return 1;
- switch (icmph->type) {
- case ICMP_DEST_UNREACH:
- case ICMP_SOURCE_QUENCH:
- case ICMP_REDIRECT:
- case ICMP_TIME_EXCEEDED:
- case ICMP_PARAMETERPROB:
- break;
- default:
+ if (!icmp_is_err(icmph->type))
return 1;
- }
inside_iph = skb_header_pointer(skb, outside_hdrlen +
sizeof(struct icmphdr),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 621f83434b24..dcc4fa10138d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1894,10 +1894,7 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
if (!icmph)
goto out;
- if (icmph->type != ICMP_DEST_UNREACH &&
- icmph->type != ICMP_REDIRECT &&
- icmph->type != ICMP_TIME_EXCEEDED &&
- icmph->type != ICMP_PARAMETERPROB)
+ if (!icmp_is_err(icmph->type))
goto out;
inner_iph = skb_header_pointer(skb,
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 535b69326f66..345b2b0ff618 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -62,10 +62,10 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
* Since subsequent timestamps use the normal tcp_time_stamp value, we
* must make sure that the resulting initial timestamp is <= tcp_time_stamp.
*/
-u64 cookie_init_timestamp(struct request_sock *req)
+u64 cookie_init_timestamp(struct request_sock *req, u64 now)
{
struct inet_request_sock *ireq;
- u32 ts, ts_now = tcp_time_stamp_raw();
+ u32 ts, ts_now = tcp_ns_to_ts(now);
u32 options = 0;
ireq = inet_rsk(req);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d8876f0e9672..9b48aec29aca 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1741,8 +1741,8 @@ static int tcp_zerocopy_receive(struct sock *sk,
struct tcp_zerocopy_receive *zc)
{
unsigned long address = (unsigned long)zc->address;
+ u32 length = 0, seq, offset, zap_len;
const skb_frag_t *frags = NULL;
- u32 length = 0, seq, offset;
struct vm_area_struct *vma;
struct sk_buff *skb = NULL;
struct tcp_sock *tp;
@@ -1769,12 +1769,12 @@ static int tcp_zerocopy_receive(struct sock *sk,
seq = tp->copied_seq;
inq = tcp_inq(sk);
zc->length = min_t(u32, zc->length, inq);
- zc->length &= ~(PAGE_SIZE - 1);
- if (zc->length) {
- zap_page_range(vma, address, zc->length);
+ zap_len = zc->length & ~(PAGE_SIZE - 1);
+ if (zap_len) {
+ zap_page_range(vma, address, zap_len);
zc->recv_skip_hint = 0;
} else {
- zc->recv_skip_hint = inq;
+ zc->recv_skip_hint = zc->length;
}
ret = 0;
while (length + PAGE_SIZE <= zc->length) {
@@ -1958,8 +1958,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
struct sk_buff *skb, *last;
u32 urg_hole = 0;
struct scm_timestamping_internal tss;
- bool has_tss = false;
- bool has_cmsg;
+ int cmsg_flags;
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
@@ -1974,7 +1973,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
if (sk->sk_state == TCP_LISTEN)
goto out;
- has_cmsg = tp->recvmsg_inq;
+ cmsg_flags = tp->recvmsg_inq ? 1 : 0;
timeo = sock_rcvtimeo(sk, nonblock);
/* Urgent data needs to be handled specially. */
@@ -2047,7 +2046,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
/* Well, if we have backlog, try to process it now yet. */
- if (copied >= target && !sk->sk_backlog.tail)
+ if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
break;
if (copied) {
@@ -2157,8 +2156,7 @@ skip_copy:
if (TCP_SKB_CB(skb)->has_rxtstamp) {
tcp_update_recv_tstamps(skb, &tss);
- has_tss = true;
- has_cmsg = true;
+ cmsg_flags |= 2;
}
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok;
@@ -2183,10 +2181,10 @@ found_fin_ok:
release_sock(sk);
- if (has_cmsg) {
- if (has_tss)
+ if (cmsg_flags) {
+ if (cmsg_flags & 2)
tcp_recv_timestamp(msg, sk, &tss);
- if (tp->recvmsg_inq) {
+ if (cmsg_flags & 1) {
inq = tcp_inq_hint(sk);
put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
}
@@ -2666,6 +2664,7 @@ int tcp_disconnect(struct sock *sk, int flags)
/* Clean up fastopen related fields */
tcp_free_fastopen_req(tp);
inet->defer_connect = 0;
+ tp->fastopen_client_fail = 0;
WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
@@ -3224,8 +3223,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
* tcpi_unacked -> Number of children ready for accept()
* tcpi_sacked -> max backlog
*/
- info->tcpi_unacked = sk->sk_ack_backlog;
- info->tcpi_sacked = sk->sk_max_ack_backlog;
+ info->tcpi_unacked = READ_ONCE(sk->sk_ack_backlog);
+ info->tcpi_sacked = READ_ONCE(sk->sk_max_ack_backlog);
return;
}
@@ -3305,6 +3304,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_reord_seen = tp->reord_seen;
info->tcpi_rcv_ooopack = tp->rcv_ooopack;
info->tcpi_snd_wnd = tp->snd_wnd;
+ info->tcpi_fastopen_client_fail = tp->fastopen_client_fail;
unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(tcp_get_info);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 549506162dde..0d08f9e2d8d0 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -21,8 +21,8 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
struct tcp_info *info = _info;
if (inet_sk_state_load(sk) == TCP_LISTEN) {
- r->idiag_rqueue = sk->sk_ack_backlog;
- r->idiag_wqueue = sk->sk_max_ack_backlog;
+ r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog);
+ r->idiag_wqueue = READ_ONCE(sk->sk_max_ack_backlog);
} else if (sk->sk_type == SOCK_STREAM) {
const struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index a915ade0c818..19ad9586c720 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -422,7 +422,10 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
cookie->len = -1;
return true;
}
- return cookie->len > 0;
+ if (cookie->len > 0)
+ return true;
+ tcp_sk(sk)->fastopen_client_fail = TFO_COOKIE_UNAVAILABLE;
+ return false;
}
/* This function checks if we want to defer sending SYN until the first
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a2e52ad7cdab..88b987ca9ebb 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5814,6 +5814,10 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
if (data) { /* Retransmit unacked data in SYN */
+ if (tp->total_retrans)
+ tp->fastopen_client_fail = TFO_SYN_RETRANSMITTED;
+ else
+ tp->fastopen_client_fail = TFO_DATA_NOT_ACKED;
skb_rbtree_walk_from(data) {
if (__tcp_retransmit_skb(sk, data, 1))
break;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 67b2dc7a1727..92282f98dc82 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -121,11 +121,9 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == AF_INET6) {
if (ipv6_addr_loopback(&tw->tw_v6_daddr) ||
- (ipv6_addr_v4mapped(&tw->tw_v6_daddr) &&
- (tw->tw_v6_daddr.s6_addr[12] == 127)) ||
+ ipv6_addr_v4mapped_loopback(&tw->tw_v6_daddr) ||
ipv6_addr_loopback(&tw->tw_v6_rcv_saddr) ||
- (ipv6_addr_v4mapped(&tw->tw_v6_rcv_saddr) &&
- (tw->tw_v6_rcv_saddr.s6_addr[12] == 127)))
+ ipv6_addr_v4mapped_loopback(&tw->tw_v6_rcv_saddr))
loopback = true;
} else
#endif
@@ -2453,7 +2451,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
state = inet_sk_state_load(sk);
if (state == TCP_LISTEN)
- rx_queue = sk->sk_ack_backlog;
+ rx_queue = READ_ONCE(sk->sk_ack_backlog);
else
/* Because we don't lock the socket,
* we might find a transient negative value.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0488607c5cd3..be6d22b8190f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3290,7 +3290,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
now = tcp_clock_ns();
#ifdef CONFIG_SYN_COOKIES
if (unlikely(req->cookie_ts))
- skb->skb_mstamp_ns = cookie_init_timestamp(req);
+ skb->skb_mstamp_ns = cookie_init_timestamp(req, now);
else
#endif
{
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 34ccef18b40e..98d82305d6de 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5552,14 +5552,13 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
if (!nla)
goto nla_put_failure;
-
- if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode))
- goto nla_put_failure;
-
read_lock_bh(&idev->lock);
memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
read_unlock_bh(&idev->lock);
+ if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
index 05f82baaa99e..f87ae33e1d01 100644
--- a/net/ipv6/fib6_notifier.c
+++ b/net/ipv6/fib6_notifier.c
@@ -7,12 +7,12 @@
#include <net/netns/ipv6.h>
#include <net/ip6_fib.h>
-int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+int call_fib6_notifier(struct notifier_block *nb,
enum fib_event_type event_type,
struct fib_notifier_info *info)
{
info->family = AF_INET6;
- return call_fib_notifier(nb, net, event_type, info);
+ return call_fib_notifier(nb, event_type, info);
}
int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
@@ -27,15 +27,16 @@ static unsigned int fib6_seq_read(struct net *net)
return fib6_tables_seq_read(net) + fib6_rules_seq_read(net);
}
-static int fib6_dump(struct net *net, struct notifier_block *nb)
+static int fib6_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
int err;
- err = fib6_rules_dump(net, nb);
+ err = fib6_rules_dump(net, nb, extack);
if (err)
return err;
- return fib6_tables_dump(net, nb);
+ return fib6_tables_dump(net, nb, extack);
}
static const struct fib_notifier_ops fib6_notifier_ops_template = {
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index f9e8fe3ff0c5..fafe556d21e0 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -47,9 +47,10 @@ bool fib6_rule_default(const struct fib_rule *rule)
}
EXPORT_SYMBOL_GPL(fib6_rule_default);
-int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+int fib6_rules_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
- return fib_rules_dump(net, nb, AF_INET6);
+ return fib_rules_dump(net, nb, AF_INET6, extack);
}
unsigned int fib6_rules_seq_read(struct net *net)
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 62c997201970..ef408a5090a2 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -516,13 +516,29 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
mip6_addr_swap(skb);
+ sk = icmpv6_xmit_lock(net);
+ if (!sk)
+ goto out_bh_enable;
+
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_ICMPV6;
fl6.daddr = hdr->saddr;
if (force_saddr)
saddr = force_saddr;
- if (saddr)
+ if (saddr) {
fl6.saddr = *saddr;
+ } else {
+ /* select a more meaningful saddr from input if */
+ struct net_device *in_netdev;
+
+ in_netdev = dev_get_by_index(net, IP6CB(skb)->iif);
+ if (in_netdev) {
+ ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
+ inet6_sk(sk)->srcprefs,
+ &fl6.saddr);
+ dev_put(in_netdev);
+ }
+ }
fl6.flowi6_mark = mark;
fl6.flowi6_oif = iif;
fl6.fl6_icmp_type = type;
@@ -531,10 +547,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
- sk = icmpv6_xmit_lock(net);
- if (!sk)
- goto out_bh_enable;
-
sk->sk_mark = mark;
np = inet6_sk(sk);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 6e2af411cd9c..f66bc2af4e9d 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -357,15 +357,17 @@ unsigned int fib6_tables_seq_read(struct net *net)
return fib_seq;
}
-static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
+static int call_fib6_entry_notifier(struct notifier_block *nb,
enum fib_event_type event_type,
- struct fib6_info *rt)
+ struct fib6_info *rt,
+ struct netlink_ext_ack *extack)
{
struct fib6_entry_notifier_info info = {
+ .info.extack = extack,
.rt = rt,
};
- return call_fib6_notifier(nb, net, event_type, &info.info);
+ return call_fib6_notifier(nb, event_type, &info.info);
}
int call_fib6_entry_notifiers(struct net *net,
@@ -401,40 +403,51 @@ int call_fib6_multipath_entry_notifiers(struct net *net,
struct fib6_dump_arg {
struct net *net;
struct notifier_block *nb;
+ struct netlink_ext_ack *extack;
};
-static void fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
+static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
{
if (rt == arg->net->ipv6.fib6_null_entry)
- return;
- call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt);
+ return 0;
+ return call_fib6_entry_notifier(arg->nb, FIB_EVENT_ENTRY_ADD,
+ rt, arg->extack);
}
static int fib6_node_dump(struct fib6_walker *w)
{
struct fib6_info *rt;
+ int err = 0;
- for_each_fib6_walker_rt(w)
- fib6_rt_dump(rt, w->args);
+ for_each_fib6_walker_rt(w) {
+ err = fib6_rt_dump(rt, w->args);
+ if (err)
+ break;
+ }
w->leaf = NULL;
- return 0;
+ return err;
}
-static void fib6_table_dump(struct net *net, struct fib6_table *tb,
- struct fib6_walker *w)
+static int fib6_table_dump(struct net *net, struct fib6_table *tb,
+ struct fib6_walker *w)
{
+ int err;
+
w->root = &tb->tb6_root;
spin_lock_bh(&tb->tb6_lock);
- fib6_walk(net, w);
+ err = fib6_walk(net, w);
spin_unlock_bh(&tb->tb6_lock);
+ return err;
}
/* Called with rcu_read_lock() */
-int fib6_tables_dump(struct net *net, struct notifier_block *nb)
+int fib6_tables_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
struct fib6_dump_arg arg;
struct fib6_walker *w;
unsigned int h;
+ int err = 0;
w = kzalloc(sizeof(*w), GFP_ATOMIC);
if (!w)
@@ -443,19 +456,24 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb)
w->func = fib6_node_dump;
arg.net = net;
arg.nb = nb;
+ arg.extack = extack;
w->args = &arg;
for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
struct hlist_head *head = &net->ipv6.fib_table_hash[h];
struct fib6_table *tb;
- hlist_for_each_entry_rcu(tb, head, tb6_hlist)
- fib6_table_dump(net, tb, w);
+ hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
+ err = fib6_table_dump(net, tb, w);
+ if (err < 0)
+ goto out;
+ }
}
+out:
kfree(w);
- return 0;
+ return err;
}
static int fib6_dump_node(struct fib6_walker *w)
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 3d71c7d6102c..ef7f707d9ae3 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -325,7 +325,8 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
list_add_tail(&skb->list, &sublist);
}
/* dispatch final sublist */
- ip6_sublist_rcv(&sublist, curr_dev, curr_net);
+ if (!list_empty(&sublist))
+ ip6_sublist_rcv(&sublist, curr_dev, curr_net);
}
INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 857a89ad4d6c..bfa49ff70531 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -265,9 +265,10 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
rtnl_unlock();
}
-static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
+static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
- return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
+ return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
}
static unsigned int ip6mr_rules_seq_read(struct net *net)
@@ -324,7 +325,8 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
rtnl_unlock();
}
-static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
+static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
return 0;
}
@@ -1256,10 +1258,11 @@ static unsigned int ip6mr_seq_read(struct net *net)
return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
}
-static int ip6mr_dump(struct net *net, struct notifier_block *nb)
+static int ip6mr_dump(struct net *net, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
- ip6mr_mr_table_iter, &mrt_lock);
+ ip6mr_mr_table_iter, &mrt_lock, extack);
}
static struct notifier_block ip6_mr_notifier = {
diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c
index 34d51cd426b0..6bac68fb27a3 100644
--- a/net/ipv6/netfilter/nf_tproxy_ipv6.c
+++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c
@@ -150,4 +150,4 @@ EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v6);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs");
-MODULE_DESCRIPTION("Netfilter IPv4 transparent proxy support");
+MODULE_DESCRIPTION("Netfilter IPv6 transparent proxy support");
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e60bf8e7dd1a..edcb52543518 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1479,11 +1479,11 @@ static u32 rt6_exception_hash(const struct in6_addr *dst,
u32 val;
net_get_random_once(&seed, sizeof(seed));
- val = jhash(dst, sizeof(*dst), seed);
+ val = jhash2((const u32 *)dst, sizeof(*dst)/sizeof(u32), seed);
#ifdef CONFIG_IPV6_SUBTREES
if (src)
- val = jhash(src, sizeof(*src), val);
+ val = jhash2((const u32 *)src, sizeof(*src)/sizeof(u32), val);
#endif
return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
}
@@ -2295,10 +2295,7 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
if (!icmph)
goto out;
- if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
- icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
- icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
- icmph->icmp6_type != ICMPV6_PARAMPROB)
+ if (!icmpv6_is_err(icmph->icmp6_type))
goto out;
inner_iph = skb_header_pointer(skb,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4804b6dc5e65..81f51335e326 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1891,7 +1891,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
state = inet_sk_state_load(sp);
if (state == TCP_LISTEN)
- rx_queue = sp->sk_ack_backlog;
+ rx_queue = READ_ONCE(sp->sk_ack_backlog);
else
/* Because we don't lock the socket,
* we might find a transient negative value.
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index c74f44dfaa22..2922d4150d88 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -705,7 +705,7 @@ static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags,
/* put original socket back into a clean listen state. */
sk->sk_state = TCP_LISTEN;
- sk->sk_ack_backlog--;
+ sk_acceptq_removed(sk);
dprintk("%s: ok success on %02X, client on %02X\n", __func__,
llc_sk(sk)->addr.sllc_sap, newllc->daddr.lsap);
frees:
@@ -780,7 +780,7 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
}
/* Well, if we have backlog, try to process it now yet. */
- if (copied >= target && !sk->sk_backlog.tail)
+ if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
break;
if (copied) {
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index b11883d26875..33da6f738c99 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -485,7 +485,14 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
params.ssn = sta->tid_seq[tid] >> 4;
ret = drv_ampdu_action(local, sdata, &params);
- if (ret) {
+ if (ret == IEEE80211_AMPDU_TX_START_IMMEDIATE) {
+ /*
+ * We didn't send the request yet, so don't need to check
+ * here if we already got a response, just mark as driver
+ * ready immediately.
+ */
+ set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state);
+ } else if (ret) {
ht_dbg(sdata,
"BA request denied - HW unavailable for %pM tid %d\n",
sta->sta.addr, tid);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 70739e746c13..4fb7f1f12109 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3428,7 +3428,7 @@ int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb,
spin_lock_irqsave(&local->ack_status_lock, spin_flags);
id = idr_alloc(&local->ack_status_frames, ack_skb,
- 1, 0x10000, GFP_ATOMIC);
+ 1, 0x40, GFP_ATOMIC);
spin_unlock_irqrestore(&local->ack_status_lock, spin_flags);
if (id < 0) {
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 0a6ff01c68a9..d40744903fa9 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -538,7 +538,6 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
struct cfg80211_bss *cbss;
- int err, changed = 0;
sdata_assert_lock(sdata);
@@ -560,13 +559,7 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata)
ifibss->chandef = sdata->csa_chandef;
/* generate the beacon */
- err = ieee80211_ibss_csa_beacon(sdata, NULL);
- if (err < 0)
- return err;
-
- changed |= err;
-
- return changed;
+ return ieee80211_ibss_csa_beacon(sdata, NULL);
}
void ieee80211_ibss_stop(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 54dd8849d1cc..5fa13176036f 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3186,15 +3186,14 @@ static int ieee80211_recalc_twt_req(struct ieee80211_sub_if_data *sdata,
static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
struct cfg80211_bss *cbss,
- struct ieee80211_mgmt *mgmt, size_t len)
+ struct ieee80211_mgmt *mgmt, size_t len,
+ struct ieee802_11_elems *elems)
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
struct sta_info *sta;
- u8 *pos;
u16 capab_info, aid;
- struct ieee802_11_elems elems;
struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
const struct cfg80211_bss_ies *bss_ies = NULL;
struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data;
@@ -3222,19 +3221,15 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
ifmgd->broken_ap = true;
}
- pos = mgmt->u.assoc_resp.variable;
- ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, &elems,
- mgmt->bssid, assoc_data->bss->bssid);
-
- if (!elems.supp_rates) {
+ if (!elems->supp_rates) {
sdata_info(sdata, "no SuppRates element in AssocResp\n");
return false;
}
ifmgd->aid = aid;
ifmgd->tdls_chan_switch_prohibited =
- elems.ext_capab && elems.ext_capab_len >= 5 &&
- (elems.ext_capab[4] & WLAN_EXT_CAPA5_TDLS_CH_SW_PROHIBITED);
+ elems->ext_capab && elems->ext_capab_len >= 5 &&
+ (elems->ext_capab[4] & WLAN_EXT_CAPA5_TDLS_CH_SW_PROHIBITED);
/*
* Some APs are erroneously not including some information in their
@@ -3243,11 +3238,11 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
* 2G/3G/4G wifi routers, reported models include the "Onda PN51T",
* "Vodafone PocketWiFi 2", "ZTE MF60" and a similar T-Mobile device.
*/
- if ((assoc_data->wmm && !elems.wmm_param) ||
+ if ((assoc_data->wmm && !elems->wmm_param) ||
(!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
- (!elems.ht_cap_elem || !elems.ht_operation)) ||
+ (!elems->ht_cap_elem || !elems->ht_operation)) ||
(!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
- (!elems.vht_cap_elem || !elems.vht_operation))) {
+ (!elems->vht_cap_elem || !elems->vht_operation))) {
const struct cfg80211_bss_ies *ies;
struct ieee802_11_elems bss_elems;
@@ -3265,8 +3260,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
mgmt->bssid,
assoc_data->bss->bssid);
if (assoc_data->wmm &&
- !elems.wmm_param && bss_elems.wmm_param) {
- elems.wmm_param = bss_elems.wmm_param;
+ !elems->wmm_param && bss_elems.wmm_param) {
+ elems->wmm_param = bss_elems.wmm_param;
sdata_info(sdata,
"AP bug: WMM param missing from AssocResp\n");
}
@@ -3275,27 +3270,27 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
* Also check if we requested HT/VHT, otherwise the AP doesn't
* have to include the IEs in the (re)association response.
*/
- if (!elems.ht_cap_elem && bss_elems.ht_cap_elem &&
+ if (!elems->ht_cap_elem && bss_elems.ht_cap_elem &&
!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
- elems.ht_cap_elem = bss_elems.ht_cap_elem;
+ elems->ht_cap_elem = bss_elems.ht_cap_elem;
sdata_info(sdata,
"AP bug: HT capability missing from AssocResp\n");
}
- if (!elems.ht_operation && bss_elems.ht_operation &&
+ if (!elems->ht_operation && bss_elems.ht_operation &&
!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
- elems.ht_operation = bss_elems.ht_operation;
+ elems->ht_operation = bss_elems.ht_operation;
sdata_info(sdata,
"AP bug: HT operation missing from AssocResp\n");
}
- if (!elems.vht_cap_elem && bss_elems.vht_cap_elem &&
+ if (!elems->vht_cap_elem && bss_elems.vht_cap_elem &&
!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) {
- elems.vht_cap_elem = bss_elems.vht_cap_elem;
+ elems->vht_cap_elem = bss_elems.vht_cap_elem;
sdata_info(sdata,
"AP bug: VHT capa missing from AssocResp\n");
}
- if (!elems.vht_operation && bss_elems.vht_operation &&
+ if (!elems->vht_operation && bss_elems.vht_operation &&
!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) {
- elems.vht_operation = bss_elems.vht_operation;
+ elems->vht_operation = bss_elems.vht_operation;
sdata_info(sdata,
"AP bug: VHT operation missing from AssocResp\n");
}
@@ -3306,7 +3301,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
* they should be present here. This is just a safety net.
*/
if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
- (!elems.wmm_param || !elems.ht_cap_elem || !elems.ht_operation)) {
+ (!elems->wmm_param || !elems->ht_cap_elem || !elems->ht_operation)) {
sdata_info(sdata,
"HT AP is missing WMM params or HT capability/operation\n");
ret = false;
@@ -3314,7 +3309,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
}
if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
- (!elems.vht_cap_elem || !elems.vht_operation)) {
+ (!elems->vht_cap_elem || !elems->vht_operation)) {
sdata_info(sdata,
"VHT AP is missing VHT capability/operation\n");
ret = false;
@@ -3341,7 +3336,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
}
if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
- (!elems.he_cap || !elems.he_operation)) {
+ (!elems->he_cap || !elems->he_operation)) {
mutex_unlock(&sdata->local->sta_mtx);
sdata_info(sdata,
"HE AP is missing HE capability/operation\n");
@@ -3350,23 +3345,23 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
}
/* Set up internal HT/VHT capabilities */
- if (elems.ht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_HT))
+ if (elems->ht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_HT))
ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
- elems.ht_cap_elem, sta);
+ elems->ht_cap_elem, sta);
- if (elems.vht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
+ if (elems->vht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
- elems.vht_cap_elem, sta);
+ elems->vht_cap_elem, sta);
- if (elems.he_operation && !(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
- elems.he_cap) {
+ if (elems->he_operation && !(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
+ elems->he_cap) {
ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
- elems.he_cap,
- elems.he_cap_len,
+ elems->he_cap,
+ elems->he_cap_len,
sta);
bss_conf->he_support = sta->sta.he_cap.has_he;
- changed |= ieee80211_recalc_twt_req(sdata, sta, &elems);
+ changed |= ieee80211_recalc_twt_req(sdata, sta, elems);
} else {
bss_conf->he_support = false;
bss_conf->twt_requester = false;
@@ -3374,14 +3369,14 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
if (bss_conf->he_support) {
bss_conf->bss_color =
- le32_get_bits(elems.he_operation->he_oper_params,
+ le32_get_bits(elems->he_operation->he_oper_params,
IEEE80211_HE_OPERATION_BSS_COLOR_MASK);
bss_conf->htc_trig_based_pkt_ext =
- le32_get_bits(elems.he_operation->he_oper_params,
+ le32_get_bits(elems->he_operation->he_oper_params,
IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK);
bss_conf->frame_time_rts_th =
- le32_get_bits(elems.he_operation->he_oper_params,
+ le32_get_bits(elems->he_operation->he_oper_params,
IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK);
bss_conf->multi_sta_back_32bit =
@@ -3392,12 +3387,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
sta->sta.he_cap.he_cap_elem.mac_cap_info[2] &
IEEE80211_HE_MAC_CAP2_ACK_EN;
- bss_conf->uora_exists = !!elems.uora_element;
- if (elems.uora_element)
- bss_conf->uora_ocw_range = elems.uora_element[0];
+ bss_conf->uora_exists = !!elems->uora_element;
+ if (elems->uora_element)
+ bss_conf->uora_ocw_range = elems->uora_element[0];
- ieee80211_he_op_ie_to_bss_conf(&sdata->vif, elems.he_operation);
- ieee80211_he_spr_ie_to_bss_conf(&sdata->vif, elems.he_spr);
+ ieee80211_he_op_ie_to_bss_conf(&sdata->vif, elems->he_operation);
+ ieee80211_he_spr_ie_to_bss_conf(&sdata->vif, elems->he_spr);
/* TODO: OPEN: what happens if BSS color disable is set? */
}
@@ -3421,11 +3416,11 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
* NSS calculation (that would be done in rate_control_rate_init())
* and use the # of streams from that element.
*/
- if (elems.opmode_notif &&
- !(*elems.opmode_notif & IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF)) {
+ if (elems->opmode_notif &&
+ !(*elems->opmode_notif & IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF)) {
u8 nss;
- nss = *elems.opmode_notif & IEEE80211_OPMODE_NOTIF_RX_NSS_MASK;
+ nss = *elems->opmode_notif & IEEE80211_OPMODE_NOTIF_RX_NSS_MASK;
nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT;
nss += 1;
sta->sta.rx_nss = nss;
@@ -3440,7 +3435,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
sta->sta.mfp = false;
}
- sta->sta.wme = elems.wmm_param && local->hw.queues >= IEEE80211_NUM_ACS;
+ sta->sta.wme = elems->wmm_param && local->hw.queues >= IEEE80211_NUM_ACS;
err = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
if (!err && !(ifmgd->flags & IEEE80211_STA_CONTROL_PORT))
@@ -3468,9 +3463,9 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
if (ifmgd->flags & IEEE80211_STA_DISABLE_WMM) {
ieee80211_set_wmm_default(sdata, false, false);
- } else if (!ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
- elems.wmm_param_len,
- elems.mu_edca_param_set)) {
+ } else if (!ieee80211_sta_wmm_params(local, sdata, elems->wmm_param,
+ elems->wmm_param_len,
+ elems->mu_edca_param_set)) {
/* still enable QoS since we might have HT/VHT */
ieee80211_set_wmm_default(sdata, false, true);
/* set the disable-WMM flag in this case to disable
@@ -3484,11 +3479,11 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
}
changed |= BSS_CHANGED_QOS;
- if (elems.max_idle_period_ie) {
+ if (elems->max_idle_period_ie) {
bss_conf->max_idle_period =
- le16_to_cpu(elems.max_idle_period_ie->max_idle_period);
+ le16_to_cpu(elems->max_idle_period_ie->max_idle_period);
bss_conf->protected_keep_alive =
- !!(elems.max_idle_period_ie->idle_options &
+ !!(elems->max_idle_period_ie->idle_options &
WLAN_IDLE_OPTIONS_PROTECTED_KEEP_ALIVE);
changed |= BSS_CHANGED_KEEP_ALIVE;
} else {
@@ -3598,7 +3593,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
event.u.mlme.reason = status_code;
drv_event_callback(sdata->local, sdata, &event);
} else {
- if (!ieee80211_assoc_success(sdata, bss, mgmt, len)) {
+ if (!ieee80211_assoc_success(sdata, bss, mgmt, len, &elems)) {
/* oops -- internal error -- send timeout for now */
ieee80211_destroy_assoc_data(sdata, false, false);
cfg80211_assoc_timeout(sdata->dev, bss);
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index ee86c3333999..86bc469a28bc 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -70,7 +70,7 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix)
}
/* return current EMWA throughput */
-int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma)
+int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_avg)
{
int usecs;
@@ -79,13 +79,13 @@ int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma)
usecs = 1000000;
/* reset thr. below 10% success */
- if (mr->stats.prob_ewma < MINSTREL_FRAC(10, 100))
+ if (mr->stats.prob_avg < MINSTREL_FRAC(10, 100))
return 0;
- if (prob_ewma > MINSTREL_FRAC(90, 100))
+ if (prob_avg > MINSTREL_FRAC(90, 100))
return MINSTREL_TRUNC(100000 * (MINSTREL_FRAC(90, 100) / usecs));
else
- return MINSTREL_TRUNC(100000 * (prob_ewma / usecs));
+ return MINSTREL_TRUNC(100000 * (prob_avg / usecs));
}
/* find & sort topmost throughput rates */
@@ -98,8 +98,8 @@ minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list)
for (j = MAX_THR_RATES; j > 0; --j) {
tmp_mrs = &mi->r[tp_list[j - 1]].stats;
- if (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_ewma) <=
- minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_ewma))
+ if (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_avg) <=
+ minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_avg))
break;
}
@@ -157,20 +157,24 @@ minstrel_update_rates(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
* Recalculate statistics and counters of a given rate
*/
void
-minstrel_calc_rate_stats(struct minstrel_rate_stats *mrs)
+minstrel_calc_rate_stats(struct minstrel_priv *mp,
+ struct minstrel_rate_stats *mrs)
{
unsigned int cur_prob;
if (unlikely(mrs->attempts > 0)) {
mrs->sample_skipped = 0;
cur_prob = MINSTREL_FRAC(mrs->success, mrs->attempts);
- if (unlikely(!mrs->att_hist)) {
- mrs->prob_ewma = cur_prob;
+ if (mp->new_avg) {
+ minstrel_filter_avg_add(&mrs->prob_avg,
+ &mrs->prob_avg_1, cur_prob);
+ } else if (unlikely(!mrs->att_hist)) {
+ mrs->prob_avg = cur_prob;
} else {
/*update exponential weighted moving avarage */
- mrs->prob_ewma = minstrel_ewma(mrs->prob_ewma,
- cur_prob,
- EWMA_LEVEL);
+ mrs->prob_avg = minstrel_ewma(mrs->prob_avg,
+ cur_prob,
+ EWMA_LEVEL);
}
mrs->att_hist += mrs->attempts;
mrs->succ_hist += mrs->success;
@@ -200,12 +204,12 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
struct minstrel_rate_stats *tmp_mrs = &mi->r[tmp_prob_rate].stats;
/* Update statistics of success probability per rate */
- minstrel_calc_rate_stats(mrs);
+ minstrel_calc_rate_stats(mp, mrs);
/* Sample less often below the 10% chance of success.
* Sample less often above the 95% chance of success. */
- if (mrs->prob_ewma > MINSTREL_FRAC(95, 100) ||
- mrs->prob_ewma < MINSTREL_FRAC(10, 100)) {
+ if (mrs->prob_avg > MINSTREL_FRAC(95, 100) ||
+ mrs->prob_avg < MINSTREL_FRAC(10, 100)) {
mr->adjusted_retry_count = mrs->retry_count >> 1;
if (mr->adjusted_retry_count > 2)
mr->adjusted_retry_count = 2;
@@ -225,14 +229,14 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
* choose the maximum throughput rate as max_prob_rate
* (2) if all success probabilities < 95%, the rate with
* highest success probability is chosen as max_prob_rate */
- if (mrs->prob_ewma >= MINSTREL_FRAC(95, 100)) {
- tmp_cur_tp = minstrel_get_tp_avg(mr, mrs->prob_ewma);
+ if (mrs->prob_avg >= MINSTREL_FRAC(95, 100)) {
+ tmp_cur_tp = minstrel_get_tp_avg(mr, mrs->prob_avg);
tmp_prob_tp = minstrel_get_tp_avg(&mi->r[tmp_prob_rate],
- tmp_mrs->prob_ewma);
+ tmp_mrs->prob_avg);
if (tmp_cur_tp >= tmp_prob_tp)
tmp_prob_rate = i;
} else {
- if (mrs->prob_ewma >= tmp_mrs->prob_ewma)
+ if (mrs->prob_avg >= tmp_mrs->prob_avg)
tmp_prob_rate = i;
}
}
@@ -290,7 +294,7 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband,
mi->sample_deferred--;
if (time_after(jiffies, mi->last_stats_update +
- (mp->update_interval * HZ) / 1000))
+ mp->update_interval / (mp->new_avg ? 2 : 1)))
minstrel_update_stats(mp, mi);
}
@@ -422,7 +426,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
* has a probability of >95%, we shouldn't be attempting
* to use it, as this only wastes precious airtime */
if (!mrr_capable &&
- (mi->r[ndx].stats.prob_ewma > MINSTREL_FRAC(95, 100)))
+ (mi->r[ndx].stats.prob_avg > MINSTREL_FRAC(95, 100)))
return;
mi->prev_sample = true;
@@ -573,7 +577,7 @@ static u32 minstrel_get_expected_throughput(void *priv_sta)
* computing cur_tp
*/
tmp_mrs = &mi->r[idx].stats;
- tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma) * 10;
+ tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_avg) * 10;
tmp_cur_tp = tmp_cur_tp * 1200 * 8 / 1024;
return tmp_cur_tp;
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index 51d8b2c846e7..dbb43bcd3c45 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -19,6 +19,21 @@
#define MAX_THR_RATES 4
/*
+ * Coefficients for moving average with noise filter (period=16),
+ * scaled by 10 bits
+ *
+ * a1 = exp(-pi * sqrt(2) / period)
+ * coeff2 = 2 * a1 * cos(sqrt(2) * 2 * pi / period)
+ * coeff3 = -sqr(a1)
+ * coeff1 = 1 - coeff2 - coeff3
+ */
+#define MINSTREL_AVG_COEFF1 (MINSTREL_FRAC(1, 1) - \
+ MINSTREL_AVG_COEFF2 - \
+ MINSTREL_AVG_COEFF3)
+#define MINSTREL_AVG_COEFF2 0x00001499
+#define MINSTREL_AVG_COEFF3 -0x0000092e
+
+/*
* Perform EWMA (Exponentially Weighted Moving Average) calculation
*/
static inline int
@@ -32,6 +47,37 @@ minstrel_ewma(int old, int new, int weight)
return old + incr;
}
+static inline int minstrel_filter_avg_add(u16 *prev_1, u16 *prev_2, s32 in)
+{
+ s32 out_1 = *prev_1;
+ s32 out_2 = *prev_2;
+ s32 val;
+
+ if (!in)
+ in += 1;
+
+ if (!out_1) {
+ val = out_1 = in;
+ goto out;
+ }
+
+ val = MINSTREL_AVG_COEFF1 * in;
+ val += MINSTREL_AVG_COEFF2 * out_1;
+ val += MINSTREL_AVG_COEFF3 * out_2;
+ val >>= MINSTREL_SCALE;
+
+ if (val > 1 << MINSTREL_SCALE)
+ val = 1 << MINSTREL_SCALE;
+ if (val < 0)
+ val = 1;
+
+out:
+ *prev_2 = out_1;
+ *prev_1 = val;
+
+ return val;
+}
+
struct minstrel_rate_stats {
/* current / last sampling period attempts/success counters */
u16 attempts, last_attempts;
@@ -40,8 +86,9 @@ struct minstrel_rate_stats {
/* total attempts/success counters */
u32 att_hist, succ_hist;
- /* prob_ewma - exponential weighted moving average of prob */
- u16 prob_ewma;
+ /* prob_avg - moving average of prob */
+ u16 prob_avg;
+ u16 prob_avg_1;
/* maximum retry counts */
u8 retry_count;
@@ -95,6 +142,7 @@ struct minstrel_sta_info {
struct minstrel_priv {
struct ieee80211_hw *hw;
bool has_mrr;
+ bool new_avg;
u32 sample_switch;
unsigned int cw_min;
unsigned int cw_max;
@@ -126,8 +174,9 @@ extern const struct rate_control_ops mac80211_minstrel;
void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
/* Recalculate success probabilities and counters for a given rate using EWMA */
-void minstrel_calc_rate_stats(struct minstrel_rate_stats *mrs);
-int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma);
+void minstrel_calc_rate_stats(struct minstrel_priv *mp,
+ struct minstrel_rate_stats *mrs);
+int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_avg);
/* debugfs */
int minstrel_stats_open(struct inode *inode, struct file *file);
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index c8afd85b51a0..9b8e0daeb7bb 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -90,8 +90,8 @@ minstrel_stats_open(struct inode *inode, struct file *file)
p += sprintf(p, "%6u ", mr->perfect_tx_time);
tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100));
- tp_avg = minstrel_get_tp_avg(mr, mrs->prob_ewma);
- eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
+ tp_avg = minstrel_get_tp_avg(mr, mrs->prob_avg);
+ eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000);
p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u"
" %3u %3u %-3u "
@@ -147,8 +147,8 @@ minstrel_stats_csv_open(struct inode *inode, struct file *file)
p += sprintf(p, "%u,",mr->perfect_tx_time);
tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100));
- tp_avg = minstrel_get_tp_avg(mr, mrs->prob_ewma);
- eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
+ tp_avg = minstrel_get_tp_avg(mr, mrs->prob_avg);
+ eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000);
p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u,%u,%u,"
"%llu,%llu,%d,%d\n",
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 0ef2633349b5..694a31978a04 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -346,12 +346,12 @@ minstrel_ht_avg_ampdu_len(struct minstrel_ht_sta *mi)
*/
int
minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
- int prob_ewma)
+ int prob_avg)
{
unsigned int nsecs = 0;
/* do not account throughput if sucess prob is below 10% */
- if (prob_ewma < MINSTREL_FRAC(10, 100))
+ if (prob_avg < MINSTREL_FRAC(10, 100))
return 0;
if (group != MINSTREL_CCK_GROUP)
@@ -365,11 +365,11 @@ minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
* account for collision related packet error rate fluctuation
* (prob is scaled - see MINSTREL_FRAC above)
*/
- if (prob_ewma > MINSTREL_FRAC(90, 100))
+ if (prob_avg > MINSTREL_FRAC(90, 100))
return MINSTREL_TRUNC(100000 * ((MINSTREL_FRAC(90, 100) * 1000)
/ nsecs));
else
- return MINSTREL_TRUNC(100000 * ((prob_ewma * 1000) / nsecs));
+ return MINSTREL_TRUNC(100000 * ((prob_avg * 1000) / nsecs));
}
/*
@@ -389,13 +389,13 @@ minstrel_ht_sort_best_tp_rates(struct minstrel_ht_sta *mi, u16 index,
cur_group = index / MCS_GROUP_RATES;
cur_idx = index % MCS_GROUP_RATES;
- cur_prob = mi->groups[cur_group].rates[cur_idx].prob_ewma;
+ cur_prob = mi->groups[cur_group].rates[cur_idx].prob_avg;
cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, cur_prob);
do {
tmp_group = tp_list[j - 1] / MCS_GROUP_RATES;
tmp_idx = tp_list[j - 1] % MCS_GROUP_RATES;
- tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma;
+ tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg;
tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx,
tmp_prob);
if (cur_tp_avg < tmp_tp_avg ||
@@ -432,7 +432,7 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index)
tmp_group = mi->max_prob_rate / MCS_GROUP_RATES;
tmp_idx = mi->max_prob_rate % MCS_GROUP_RATES;
- tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma;
+ tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg;
tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob);
/* if max_tp_rate[0] is from MCS_GROUP max_prob_rate get selected from
@@ -444,11 +444,11 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index)
max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES;
max_gpr_idx = mg->max_group_prob_rate % MCS_GROUP_RATES;
- max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_ewma;
+ max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_avg;
- if (mrs->prob_ewma > MINSTREL_FRAC(75, 100)) {
+ if (mrs->prob_avg > MINSTREL_FRAC(75, 100)) {
cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx,
- mrs->prob_ewma);
+ mrs->prob_avg);
if (cur_tp_avg > tmp_tp_avg)
mi->max_prob_rate = index;
@@ -458,9 +458,9 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index)
if (cur_tp_avg > max_gpr_tp_avg)
mg->max_group_prob_rate = index;
} else {
- if (mrs->prob_ewma > tmp_prob)
+ if (mrs->prob_avg > tmp_prob)
mi->max_prob_rate = index;
- if (mrs->prob_ewma > max_gpr_prob)
+ if (mrs->prob_avg > max_gpr_prob)
mg->max_group_prob_rate = index;
}
}
@@ -482,12 +482,12 @@ minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi,
tmp_group = tmp_cck_tp_rate[0] / MCS_GROUP_RATES;
tmp_idx = tmp_cck_tp_rate[0] % MCS_GROUP_RATES;
- tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma;
+ tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg;
tmp_cck_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob);
tmp_group = tmp_mcs_tp_rate[0] / MCS_GROUP_RATES;
tmp_idx = tmp_mcs_tp_rate[0] % MCS_GROUP_RATES;
- tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma;
+ tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg;
tmp_mcs_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob);
if (tmp_cck_tp_rate && tmp_cck_tp > tmp_mcs_tp) {
@@ -518,7 +518,7 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi)
continue;
tmp_idx = mg->max_group_prob_rate % MCS_GROUP_RATES;
- tmp_prob = mi->groups[group].rates[tmp_idx].prob_ewma;
+ tmp_prob = mi->groups[group].rates[tmp_idx].prob_avg;
if (tmp_tp < minstrel_ht_get_tp_avg(mi, group, tmp_idx, tmp_prob) &&
(minstrel_mcs_groups[group].streams < tmp_max_streams)) {
@@ -623,7 +623,7 @@ minstrel_ht_rate_sample_switch(struct minstrel_priv *mp,
* If that fails, look again for a rate that is at least as fast
*/
mrs = minstrel_get_ratestats(mi, mi->max_tp_rate[0]);
- faster_rate = mrs->prob_ewma > MINSTREL_FRAC(75, 100);
+ faster_rate = mrs->prob_avg > MINSTREL_FRAC(75, 100);
minstrel_ht_find_probe_rates(mi, rates, &n_rates, faster_rate);
if (!n_rates && faster_rate)
minstrel_ht_find_probe_rates(mi, rates, &n_rates, false);
@@ -737,8 +737,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
mrs = &mg->rates[i];
mrs->retry_updated = false;
- minstrel_calc_rate_stats(mrs);
- cur_prob = mrs->prob_ewma;
+ minstrel_calc_rate_stats(mp, mrs);
+ cur_prob = mrs->prob_avg;
if (minstrel_ht_get_tp_avg(mi, group, i, cur_prob) == 0)
continue;
@@ -773,6 +773,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
/* try to sample all available rates during each interval */
mi->sample_count *= 8;
+ if (mp->new_avg)
+ mi->sample_count /= 2;
if (sample)
minstrel_ht_rate_sample_switch(mp, mi);
@@ -889,6 +891,7 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
struct ieee80211_tx_rate *ar = info->status.rates;
struct minstrel_rate_stats *rate, *rate2, *rate_sample = NULL;
struct minstrel_priv *mp = priv;
+ u32 update_interval = mp->update_interval / 2;
bool last, update = false;
bool sample_status = false;
int i;
@@ -943,6 +946,10 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
switch (mi->sample_mode) {
case MINSTREL_SAMPLE_IDLE:
+ if (mp->new_avg &&
+ (mp->hw->max_rates > 1 ||
+ mi->total_packets_cur < SAMPLE_SWITCH_THR))
+ update_interval /= 2;
break;
case MINSTREL_SAMPLE_ACTIVE:
@@ -970,23 +977,20 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
*/
rate = minstrel_get_ratestats(mi, mi->max_tp_rate[0]);
if (rate->attempts > 30 &&
- MINSTREL_FRAC(rate->success, rate->attempts) <
- MINSTREL_FRAC(20, 100)) {
+ rate->success < rate->attempts / 4) {
minstrel_downgrade_rate(mi, &mi->max_tp_rate[0], true);
update = true;
}
rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate[1]);
if (rate2->attempts > 30 &&
- MINSTREL_FRAC(rate2->success, rate2->attempts) <
- MINSTREL_FRAC(20, 100)) {
+ rate2->success < rate2->attempts / 4) {
minstrel_downgrade_rate(mi, &mi->max_tp_rate[1], false);
update = true;
}
}
- if (time_after(jiffies, mi->last_stats_update +
- (mp->update_interval / 2 * HZ) / 1000)) {
+ if (time_after(jiffies, mi->last_stats_update + update_interval)) {
update = true;
minstrel_ht_update_stats(mp, mi, true);
}
@@ -1008,7 +1012,7 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
unsigned int overhead = 0, overhead_rtscts = 0;
mrs = minstrel_get_ratestats(mi, index);
- if (mrs->prob_ewma < MINSTREL_FRAC(1, 10)) {
+ if (mrs->prob_avg < MINSTREL_FRAC(1, 10)) {
mrs->retry_count = 1;
mrs->retry_count_rtscts = 1;
return;
@@ -1065,7 +1069,7 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
if (!mrs->retry_updated)
minstrel_calc_retransmit(mp, mi, index);
- if (mrs->prob_ewma < MINSTREL_FRAC(20, 100) || !mrs->retry_count) {
+ if (mrs->prob_avg < MINSTREL_FRAC(20, 100) || !mrs->retry_count) {
ratetbl->rate[offset].count = 2;
ratetbl->rate[offset].count_rts = 2;
ratetbl->rate[offset].count_cts = 2;
@@ -1099,11 +1103,11 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
}
static inline int
-minstrel_ht_get_prob_ewma(struct minstrel_ht_sta *mi, int rate)
+minstrel_ht_get_prob_avg(struct minstrel_ht_sta *mi, int rate)
{
int group = rate / MCS_GROUP_RATES;
rate %= MCS_GROUP_RATES;
- return mi->groups[group].rates[rate].prob_ewma;
+ return mi->groups[group].rates[rate].prob_avg;
}
static int
@@ -1115,7 +1119,7 @@ minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi)
unsigned int duration;
/* Disable A-MSDU if max_prob_rate is bad */
- if (mi->groups[group].rates[rate].prob_ewma < MINSTREL_FRAC(50, 100))
+ if (mi->groups[group].rates[rate].prob_avg < MINSTREL_FRAC(50, 100))
return 1;
duration = g->duration[rate];
@@ -1138,7 +1142,7 @@ minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi)
* data packet size
*/
if (duration > MCS_DURATION(1, 0, 260) ||
- (minstrel_ht_get_prob_ewma(mi, mi->max_tp_rate[0]) <
+ (minstrel_ht_get_prob_avg(mi, mi->max_tp_rate[0]) <
MINSTREL_FRAC(75, 100)))
return 3200;
@@ -1243,7 +1247,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
* rate, to avoid wasting airtime.
*/
sample_dur = minstrel_get_duration(sample_idx);
- if (mrs->prob_ewma > MINSTREL_FRAC(95, 100) ||
+ if (mrs->prob_avg > MINSTREL_FRAC(95, 100) ||
minstrel_get_duration(mi->max_prob_rate) * 3 < sample_dur)
return -1;
@@ -1666,7 +1670,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
mp->has_mrr = true;
mp->hw = hw;
- mp->update_interval = 100;
+ mp->update_interval = HZ / 10;
+ mp->new_avg = true;
#ifdef CONFIG_MAC80211_DEBUGFS
mp->fixed_rate_idx = (u32) -1;
@@ -1674,6 +1679,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
&mp->fixed_rate_idx);
debugfs_create_u32("sample_switch", S_IRUGO | S_IWUSR, debugfsdir,
&mp->sample_switch);
+ debugfs_create_bool("new_avg", S_IRUGO | S_IWUSR, debugfsdir,
+ &mp->new_avg);
#endif
minstrel_ht_init_cck_rates(mp);
@@ -1698,7 +1705,7 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
i = mi->max_tp_rate[0] / MCS_GROUP_RATES;
j = mi->max_tp_rate[0] % MCS_GROUP_RATES;
- prob = mi->groups[i].rates[j].prob_ewma;
+ prob = mi->groups[i].rates[j].prob_avg;
/* convert tp_avg from pkt per second in kbps */
tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * 10;
diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h
index f938701e7ab7..53ea3c29debf 100644
--- a/net/mac80211/rc80211_minstrel_ht.h
+++ b/net/mac80211/rc80211_minstrel_ht.h
@@ -119,6 +119,6 @@ struct minstrel_ht_sta_priv {
void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
int minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
- int prob_ewma);
+ int prob_avg);
#endif
diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
index 5a6e9f3edc04..bebb71917742 100644
--- a/net/mac80211/rc80211_minstrel_ht_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c
@@ -98,8 +98,8 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p)
p += sprintf(p, "%6u ", tx_time);
tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100));
- tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_ewma);
- eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
+ tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_avg);
+ eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000);
p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u"
" %3u %3u %-3u "
@@ -243,8 +243,8 @@ minstrel_ht_stats_csv_dump(struct minstrel_ht_sta *mi, int i, char *p)
p += sprintf(p, "%u,", tx_time);
tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100));
- tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_ewma);
- eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
+ tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_avg);
+ eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000);
p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u,%u,"
"%u,%llu,%llu,",
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1fa422782905..db38be1b75fa 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1617,7 +1617,7 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local,
static bool ieee80211_tx_frags(struct ieee80211_local *local,
struct ieee80211_vif *vif,
- struct ieee80211_sta *sta,
+ struct sta_info *sta,
struct sk_buff_head *skbs,
bool txpending)
{
@@ -1679,7 +1679,7 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
info->control.vif = vif;
- control.sta = sta;
+ control.sta = sta ? &sta->sta : NULL;
__skb_unlink(skb, skbs);
drv_tx(local, &control, skb);
@@ -1698,7 +1698,6 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
struct ieee80211_tx_info *info;
struct ieee80211_sub_if_data *sdata;
struct ieee80211_vif *vif;
- struct ieee80211_sta *pubsta;
struct sk_buff *skb;
bool result = true;
__le16 fc;
@@ -1713,11 +1712,6 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
if (sta && !sta->uploaded)
sta = NULL;
- if (sta)
- pubsta = &sta->sta;
- else
- pubsta = NULL;
-
switch (sdata->vif.type) {
case NL80211_IFTYPE_MONITOR:
if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) {
@@ -1744,8 +1738,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
break;
}
- result = ieee80211_tx_frags(local, vif, pubsta, skbs,
- txpending);
+ result = ieee80211_tx_frags(local, vif, sta, skbs, txpending);
ieee80211_tpt_led_trig_tx(local, fc, led_len);
@@ -2424,6 +2417,33 @@ static int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata,
return 0;
}
+static int ieee80211_store_ack_skb(struct ieee80211_local *local,
+ struct sk_buff *skb,
+ u32 *info_flags)
+{
+ struct sk_buff *ack_skb = skb_clone_sk(skb);
+ u16 info_id = 0;
+
+ if (ack_skb) {
+ unsigned long flags;
+ int id;
+
+ spin_lock_irqsave(&local->ack_status_lock, flags);
+ id = idr_alloc(&local->ack_status_frames, ack_skb,
+ 1, 0x40, GFP_ATOMIC);
+ spin_unlock_irqrestore(&local->ack_status_lock, flags);
+
+ if (id >= 0) {
+ info_id = id;
+ *info_flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+ } else {
+ kfree_skb(ack_skb);
+ }
+ }
+
+ return info_id;
+}
+
/**
* ieee80211_build_hdr - build 802.11 header in the given frame
* @sdata: virtual interface to build the header for
@@ -2717,26 +2737,8 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
}
if (unlikely(!multicast && skb->sk &&
- skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)) {
- struct sk_buff *ack_skb = skb_clone_sk(skb);
-
- if (ack_skb) {
- unsigned long flags;
- int id;
-
- spin_lock_irqsave(&local->ack_status_lock, flags);
- id = idr_alloc(&local->ack_status_frames, ack_skb,
- 1, 0x10000, GFP_ATOMIC);
- spin_unlock_irqrestore(&local->ack_status_lock, flags);
-
- if (id >= 0) {
- info_id = id;
- info_flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
- } else {
- kfree_skb(ack_skb);
- }
- }
- }
+ skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS))
+ info_id = ieee80211_store_ack_skb(local, skb, &info_flags);
/*
* If the skb is shared we need to obtain our own copy.
@@ -3529,7 +3531,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
struct ieee80211_sub_if_data, u.ap);
__skb_queue_tail(&tx.skbs, skb);
- ieee80211_tx_frags(local, &sdata->vif, &sta->sta, &tx.skbs, false);
+ ieee80211_tx_frags(local, &sdata->vif, sta, &tx.skbs, false);
return true;
}
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 5d5bdf450091..78f046ec506f 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -536,6 +536,26 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
}
EXPORT_SYMBOL(nf_hook_slow);
+void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state,
+ const struct nf_hook_entries *e)
+{
+ struct sk_buff *skb, *next;
+ struct list_head sublist;
+ int ret;
+
+ INIT_LIST_HEAD(&sublist);
+
+ list_for_each_entry_safe(skb, next, head, list) {
+ skb_list_del_init(skb);
+ ret = nf_hook_slow(skb, state, e, 0);
+ if (ret == 1)
+ list_add_tail(&skb->list, &sublist);
+ }
+ /* Put passed packets back on main list */
+ list_splice(&sublist, head);
+}
+EXPORT_SYMBOL(nf_hook_slow_list);
+
/* This needs to be compiled in any case to avoid dependencies between the
* nfnetlink_queue code and nf_conntrack.
*/
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 063df74b4647..1abd6f0dc227 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -192,7 +192,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
}
#ifndef IP_SET_BITMAP_STORED_TIMEOUT
-static inline bool
+static bool
mtype_is_filled(const struct mtype_elem *x)
{
return true;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 11ff9d4a7006..abe8f77d7d23 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -55,7 +55,7 @@ struct bitmap_ip_adt_elem {
u16 id;
};
-static inline u32
+static u32
ip_to_id(const struct bitmap_ip *m, u32 ip)
{
return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip) / m->hosts;
@@ -63,33 +63,33 @@ ip_to_id(const struct bitmap_ip *m, u32 ip)
/* Common functions */
-static inline int
+static int
bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e,
struct bitmap_ip *map, size_t dsize)
{
return !!test_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map, size_t dsize)
{
return !!test_bit(id, map->members);
}
-static inline int
+static int
bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map,
u32 flags, size_t dsize)
{
return !!test_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_ip_do_del(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)
{
return !test_and_clear_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id,
size_t dsize)
{
@@ -97,7 +97,7 @@ bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id,
htonl(map->first_ip + id * map->hosts));
}
-static inline int
+static int
bitmap_ip_do_head(struct sk_buff *skb, const struct bitmap_ip *map)
{
return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
@@ -237,6 +237,18 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
return true;
}
+static u32
+range_to_mask(u32 from, u32 to, u8 *bits)
+{
+ u32 mask = 0xFFFFFFFE;
+
+ *bits = 32;
+ while (--(*bits) > 0 && mask && (to & mask) != from)
+ mask <<= 1;
+
+ return mask;
+}
+
static int
bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
u32 flags)
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 1d4e63326e68..b618713297da 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -65,7 +65,7 @@ struct bitmap_ipmac_elem {
unsigned char filled;
} __aligned(__alignof__(u64));
-static inline u32
+static u32
ip_to_id(const struct bitmap_ipmac *m, u32 ip)
{
return ip - m->first_ip;
@@ -79,7 +79,7 @@ ip_to_id(const struct bitmap_ipmac *m, u32 ip)
/* Common functions */
-static inline int
+static int
bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
const struct bitmap_ipmac *map, size_t dsize)
{
@@ -94,7 +94,7 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
return -EAGAIN;
}
-static inline int
+static int
bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
{
const struct bitmap_ipmac_elem *elem;
@@ -106,13 +106,13 @@ bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
return elem->filled == MAC_FILLED;
}
-static inline int
+static int
bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem)
{
return elem->filled == MAC_FILLED;
}
-static inline int
+static int
bitmap_ipmac_add_timeout(unsigned long *timeout,
const struct bitmap_ipmac_adt_elem *e,
const struct ip_set_ext *ext, struct ip_set *set,
@@ -139,7 +139,7 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
return 0;
}
-static inline int
+static int
bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
struct bitmap_ipmac *map, u32 flags, size_t dsize)
{
@@ -177,14 +177,14 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
return IPSET_ADD_STORE_PLAIN_TIMEOUT;
}
-static inline int
+static int
bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e,
struct bitmap_ipmac *map)
{
return !test_and_clear_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
u32 id, size_t dsize)
{
@@ -197,7 +197,7 @@ bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, elem->ether));
}
-static inline int
+static int
bitmap_ipmac_do_head(struct sk_buff *skb, const struct bitmap_ipmac *map)
{
return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 704a0dda1609..23d6095cb196 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -46,7 +46,7 @@ struct bitmap_port_adt_elem {
u16 id;
};
-static inline u16
+static u16
port_to_id(const struct bitmap_port *m, u16 port)
{
return port - m->first_port;
@@ -54,34 +54,34 @@ port_to_id(const struct bitmap_port *m, u16 port)
/* Common functions */
-static inline int
+static int
bitmap_port_do_test(const struct bitmap_port_adt_elem *e,
const struct bitmap_port *map, size_t dsize)
{
return !!test_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_port_gc_test(u16 id, const struct bitmap_port *map, size_t dsize)
{
return !!test_bit(id, map->members);
}
-static inline int
+static int
bitmap_port_do_add(const struct bitmap_port_adt_elem *e,
struct bitmap_port *map, u32 flags, size_t dsize)
{
return !!test_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_port_do_del(const struct bitmap_port_adt_elem *e,
struct bitmap_port *map)
{
return !test_and_clear_bit(e->id, map->members);
}
-static inline int
+static int
bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id,
size_t dsize)
{
@@ -89,13 +89,40 @@ bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id,
htons(map->first_port + id));
}
-static inline int
+static int
bitmap_port_do_head(struct sk_buff *skb, const struct bitmap_port *map)
{
return nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port)) ||
nla_put_net16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port));
}
+static bool
+ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
+{
+ bool ret;
+ u8 proto;
+
+ switch (pf) {
+ case NFPROTO_IPV4:
+ ret = ip_set_get_ip4_port(skb, src, port, &proto);
+ break;
+ case NFPROTO_IPV6:
+ ret = ip_set_get_ip6_port(skb, src, port, &proto);
+ break;
+ default:
+ return false;
+ }
+ if (!ret)
+ return ret;
+ switch (proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ return true;
+ default:
+ return false;
+ }
+}
+
static int
bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index d73d1828216a..169e0a04f814 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -35,7 +35,7 @@ struct ip_set_net {
static unsigned int ip_set_net_id __read_mostly;
-static inline struct ip_set_net *ip_set_pernet(struct net *net)
+static struct ip_set_net *ip_set_pernet(struct net *net)
{
return net_generic(net, ip_set_net_id);
}
@@ -67,13 +67,13 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
* serialized by ip_set_type_mutex.
*/
-static inline void
+static void
ip_set_type_lock(void)
{
mutex_lock(&ip_set_type_mutex);
}
-static inline void
+static void
ip_set_type_unlock(void)
{
mutex_unlock(&ip_set_type_mutex);
@@ -277,7 +277,7 @@ ip_set_free(void *members)
}
EXPORT_SYMBOL_GPL(ip_set_free);
-static inline bool
+static bool
flag_nested(const struct nlattr *nla)
{
return nla->nla_type & NLA_F_NESTED;
@@ -327,6 +327,83 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
}
EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
+static u32
+ip_set_timeout_get(const unsigned long *timeout)
+{
+ u32 t;
+
+ if (*timeout == IPSET_ELEM_PERMANENT)
+ return 0;
+
+ t = jiffies_to_msecs(*timeout - jiffies) / MSEC_PER_SEC;
+ /* Zero value in userspace means no timeout */
+ return t == 0 ? 1 : t;
+}
+
+static char *
+ip_set_comment_uget(struct nlattr *tb)
+{
+ return nla_data(tb);
+}
+
+/* Called from uadd only, protected by the set spinlock.
+ * The kadt functions don't use the comment extensions in any way.
+ */
+void
+ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
+ const struct ip_set_ext *ext)
+{
+ struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
+ size_t len = ext->comment ? strlen(ext->comment) : 0;
+
+ if (unlikely(c)) {
+ set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
+ kfree_rcu(c, rcu);
+ rcu_assign_pointer(comment->c, NULL);
+ }
+ if (!len)
+ return;
+ if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
+ len = IPSET_MAX_COMMENT_SIZE;
+ c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
+ if (unlikely(!c))
+ return;
+ strlcpy(c->str, ext->comment, len + 1);
+ set->ext_size += sizeof(*c) + strlen(c->str) + 1;
+ rcu_assign_pointer(comment->c, c);
+}
+EXPORT_SYMBOL_GPL(ip_set_init_comment);
+
+/* Used only when dumping a set, protected by rcu_read_lock() */
+static int
+ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment)
+{
+ struct ip_set_comment_rcu *c = rcu_dereference(comment->c);
+
+ if (!c)
+ return 0;
+ return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
+}
+
+/* Called from uadd/udel, flush or the garbage collectors protected
+ * by the set spinlock.
+ * Called when the set is destroyed and when there can't be any user
+ * of the set data anymore.
+ */
+static void
+ip_set_comment_free(struct ip_set *set, void *ptr)
+{
+ struct ip_set_comment *comment = ptr;
+ struct ip_set_comment_rcu *c;
+
+ c = rcu_dereference_protected(comment->c, 1);
+ if (unlikely(!c))
+ return;
+ set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
+ kfree_rcu(c, rcu);
+ rcu_assign_pointer(comment->c, NULL);
+}
+
typedef void (*destroyer)(struct ip_set *, void *);
/* ipset data extension types, in size order */
@@ -353,12 +430,12 @@ const struct ip_set_ext_type ip_set_extensions[] = {
.flag = IPSET_FLAG_WITH_COMMENT,
.len = sizeof(struct ip_set_comment),
.align = __alignof__(struct ip_set_comment),
- .destroy = (destroyer) ip_set_comment_free,
+ .destroy = ip_set_comment_free,
},
};
EXPORT_SYMBOL_GPL(ip_set_extensions);
-static inline bool
+static bool
add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
{
return ip_set_extensions[id].flag ?
@@ -448,6 +525,46 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
}
EXPORT_SYMBOL_GPL(ip_set_get_extensions);
+static u64
+ip_set_get_bytes(const struct ip_set_counter *counter)
+{
+ return (u64)atomic64_read(&(counter)->bytes);
+}
+
+static u64
+ip_set_get_packets(const struct ip_set_counter *counter)
+{
+ return (u64)atomic64_read(&(counter)->packets);
+}
+
+static bool
+ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter)
+{
+ return nla_put_net64(skb, IPSET_ATTR_BYTES,
+ cpu_to_be64(ip_set_get_bytes(counter)),
+ IPSET_ATTR_PAD) ||
+ nla_put_net64(skb, IPSET_ATTR_PACKETS,
+ cpu_to_be64(ip_set_get_packets(counter)),
+ IPSET_ATTR_PAD);
+}
+
+static bool
+ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo)
+{
+ /* Send nonzero parameters only */
+ return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
+ nla_put_net64(skb, IPSET_ATTR_SKBMARK,
+ cpu_to_be64((u64)skbinfo->skbmark << 32 |
+ skbinfo->skbmarkmask),
+ IPSET_ATTR_PAD)) ||
+ (skbinfo->skbprio &&
+ nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+ cpu_to_be32(skbinfo->skbprio))) ||
+ (skbinfo->skbqueue &&
+ nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+ cpu_to_be16(skbinfo->skbqueue)));
+}
+
int
ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
const void *e, bool active)
@@ -473,6 +590,55 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
}
EXPORT_SYMBOL_GPL(ip_set_put_extensions);
+static bool
+ip_set_match_counter(u64 counter, u64 match, u8 op)
+{
+ switch (op) {
+ case IPSET_COUNTER_NONE:
+ return true;
+ case IPSET_COUNTER_EQ:
+ return counter == match;
+ case IPSET_COUNTER_NE:
+ return counter != match;
+ case IPSET_COUNTER_LT:
+ return counter < match;
+ case IPSET_COUNTER_GT:
+ return counter > match;
+ }
+ return false;
+}
+
+static void
+ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
+{
+ atomic64_add((long long)bytes, &(counter)->bytes);
+}
+
+static void
+ip_set_add_packets(u64 packets, struct ip_set_counter *counter)
+{
+ atomic64_add((long long)packets, &(counter)->packets);
+}
+
+static void
+ip_set_update_counter(struct ip_set_counter *counter,
+ const struct ip_set_ext *ext, u32 flags)
+{
+ if (ext->packets != ULLONG_MAX &&
+ !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) {
+ ip_set_add_bytes(ext->bytes, counter);
+ ip_set_add_packets(ext->packets, counter);
+ }
+}
+
+static void
+ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
+ const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags)
+{
+ mext->skbinfo = *skbinfo;
+}
+
bool
ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext,
struct ip_set_ext *mext, u32 flags, void *data)
@@ -508,7 +674,7 @@ EXPORT_SYMBOL_GPL(ip_set_match_extensions);
* The set behind an index may change by swapping only, from userspace.
*/
-static inline void
+static void
__ip_set_get(struct ip_set *set)
{
write_lock_bh(&ip_set_ref_lock);
@@ -516,7 +682,7 @@ __ip_set_get(struct ip_set *set)
write_unlock_bh(&ip_set_ref_lock);
}
-static inline void
+static void
__ip_set_put(struct ip_set *set)
{
write_lock_bh(&ip_set_ref_lock);
@@ -528,7 +694,7 @@ __ip_set_put(struct ip_set *set)
/* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need
* a separate reference counter
*/
-static inline void
+static void
__ip_set_put_netlink(struct ip_set *set)
{
write_lock_bh(&ip_set_ref_lock);
@@ -543,7 +709,7 @@ __ip_set_put_netlink(struct ip_set *set)
* so it can't be destroyed (or changed) under our foot.
*/
-static inline struct ip_set *
+static struct ip_set *
ip_set_rcu_get(struct net *net, ip_set_id_t index)
{
struct ip_set *set;
@@ -672,7 +838,7 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname);
*
*/
-static inline void
+static void
__ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index)
{
struct ip_set *set;
@@ -1255,6 +1421,30 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
#define DUMP_TYPE(arg) (((u32)(arg)) & 0x0000FFFF)
#define DUMP_FLAGS(arg) (((u32)(arg)) >> 16)
+int
+ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
+{
+ u32 cadt_flags = 0;
+
+ if (SET_WITH_TIMEOUT(set))
+ if (unlikely(nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
+ htonl(set->timeout))))
+ return -EMSGSIZE;
+ if (SET_WITH_COUNTER(set))
+ cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
+ if (SET_WITH_COMMENT(set))
+ cadt_flags |= IPSET_FLAG_WITH_COMMENT;
+ if (SET_WITH_SKBINFO(set))
+ cadt_flags |= IPSET_FLAG_WITH_SKBINFO;
+ if (SET_WITH_FORCEADD(set))
+ cadt_flags |= IPSET_FLAG_WITH_FORCEADD;
+
+ if (!cadt_flags)
+ return 0;
+ return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags));
+}
+EXPORT_SYMBOL_GPL(ip_set_put_flags);
+
static int
ip_set_dump_done(struct netlink_callback *cb)
{
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 2b8f959574b4..36615eb3eae1 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -148,31 +148,3 @@ ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
}
EXPORT_SYMBOL_GPL(ip_set_get_ip6_port);
#endif
-
-bool
-ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
-{
- bool ret;
- u8 proto;
-
- switch (pf) {
- case NFPROTO_IPV4:
- ret = ip_set_get_ip4_port(skb, src, port, &proto);
- break;
- case NFPROTO_IPV6:
- ret = ip_set_get_ip6_port(skb, src, port, &proto);
- break;
- default:
- return false;
- }
- if (!ret)
- return ret;
- switch (proto) {
- case IPPROTO_TCP:
- case IPPROTO_UDP:
- return true;
- default:
- return false;
- }
-}
-EXPORT_SYMBOL_GPL(ip_set_get_ip_port);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index d098d87bc331..7480ce55b5c8 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -39,7 +39,7 @@
#ifdef IP_SET_HASH_WITH_MULTI
#define AHASH_MAX(h) ((h)->ahash_max)
-static inline u8
+static u8
tune_ahash_max(u8 curr, u32 multi)
{
u32 n;
@@ -909,7 +909,7 @@ out:
return ret;
}
-static inline int
+static int
mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
struct ip_set_ext *mext, struct ip_set *set, u32 flags)
{
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index f4432d9fcad0..5d6d68eaf6a9 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -44,7 +44,7 @@ struct hash_ip4_elem {
/* Common functions */
-static inline bool
+static bool
hash_ip4_data_equal(const struct hash_ip4_elem *e1,
const struct hash_ip4_elem *e2,
u32 *multi)
@@ -63,7 +63,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ip4_data_next(struct hash_ip4_elem *next, const struct hash_ip4_elem *e)
{
next->ip = e->ip;
@@ -171,7 +171,7 @@ struct hash_ip6_elem {
/* Common functions */
-static inline bool
+static bool
hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
const struct hash_ip6_elem *ip2,
u32 *multi)
@@ -179,7 +179,7 @@ hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6);
}
-static inline void
+static void
hash_ip6_netmask(union nf_inet_addr *ip, u8 prefix)
{
ip6_netmask(ip, prefix);
@@ -196,7 +196,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ip6_data_next(struct hash_ip6_elem *next, const struct hash_ip6_elem *e)
{
}
diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c
index 4ce563eb927d..eceb7bc4a93a 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmac.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmac.c
@@ -47,7 +47,7 @@ struct hash_ipmac4_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipmac4_data_equal(const struct hash_ipmac4_elem *e1,
const struct hash_ipmac4_elem *e2,
u32 *multi)
@@ -67,7 +67,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipmac4_data_next(struct hash_ipmac4_elem *next,
const struct hash_ipmac4_elem *e)
{
@@ -154,7 +154,7 @@ struct hash_ipmac6_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipmac6_data_equal(const struct hash_ipmac6_elem *e1,
const struct hash_ipmac6_elem *e2,
u32 *multi)
@@ -175,7 +175,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipmac6_data_next(struct hash_ipmac6_elem *next,
const struct hash_ipmac6_elem *e)
{
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 7a1734aad0c5..aba1df617d6e 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -42,7 +42,7 @@ struct hash_ipmark4_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipmark4_data_equal(const struct hash_ipmark4_elem *ip1,
const struct hash_ipmark4_elem *ip2,
u32 *multi)
@@ -64,7 +64,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipmark4_data_next(struct hash_ipmark4_elem *next,
const struct hash_ipmark4_elem *d)
{
@@ -165,7 +165,7 @@ struct hash_ipmark6_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipmark6_data_equal(const struct hash_ipmark6_elem *ip1,
const struct hash_ipmark6_elem *ip2,
u32 *multi)
@@ -187,7 +187,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipmark6_data_next(struct hash_ipmark6_elem *next,
const struct hash_ipmark6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 32e240658334..1ff228717e29 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -47,7 +47,7 @@ struct hash_ipport4_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1,
const struct hash_ipport4_elem *ip2,
u32 *multi)
@@ -71,7 +71,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipport4_data_next(struct hash_ipport4_elem *next,
const struct hash_ipport4_elem *d)
{
@@ -202,7 +202,7 @@ struct hash_ipport6_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1,
const struct hash_ipport6_elem *ip2,
u32 *multi)
@@ -226,7 +226,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipport6_data_next(struct hash_ipport6_elem *next,
const struct hash_ipport6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 15d419353179..fa88afd812fa 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -46,7 +46,7 @@ struct hash_ipportip4_elem {
u8 padding;
};
-static inline bool
+static bool
hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
const struct hash_ipportip4_elem *ip2,
u32 *multi)
@@ -72,7 +72,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipportip4_data_next(struct hash_ipportip4_elem *next,
const struct hash_ipportip4_elem *d)
{
@@ -210,7 +210,7 @@ struct hash_ipportip6_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1,
const struct hash_ipportip6_elem *ip2,
u32 *multi)
@@ -236,7 +236,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipportip6_data_next(struct hash_ipportip6_elem *next,
const struct hash_ipportip6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 7a4d7afd4121..eef6ecfcb409 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -59,7 +59,7 @@ struct hash_ipportnet4_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
const struct hash_ipportnet4_elem *ip2,
u32 *multi)
@@ -71,25 +71,25 @@ hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
ip1->proto == ip2->proto;
}
-static inline int
+static int
hash_ipportnet4_do_data_match(const struct hash_ipportnet4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_ipportnet4_data_set_flags(struct hash_ipportnet4_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
-static inline void
+static void
hash_ipportnet4_data_reset_flags(struct hash_ipportnet4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_ipportnet4_data_netmask(struct hash_ipportnet4_elem *elem, u8 cidr)
{
elem->ip2 &= ip_set_netmask(cidr);
@@ -116,7 +116,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipportnet4_data_next(struct hash_ipportnet4_elem *next,
const struct hash_ipportnet4_elem *d)
{
@@ -308,7 +308,7 @@ struct hash_ipportnet6_elem {
/* Common functions */
-static inline bool
+static bool
hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
const struct hash_ipportnet6_elem *ip2,
u32 *multi)
@@ -320,25 +320,25 @@ hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
ip1->proto == ip2->proto;
}
-static inline int
+static int
hash_ipportnet6_do_data_match(const struct hash_ipportnet6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_ipportnet6_data_set_flags(struct hash_ipportnet6_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
-static inline void
+static void
hash_ipportnet6_data_reset_flags(struct hash_ipportnet6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_ipportnet6_data_netmask(struct hash_ipportnet6_elem *elem, u8 cidr)
{
ip6_netmask(&elem->ip2, cidr);
@@ -365,7 +365,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_ipportnet6_data_next(struct hash_ipportnet6_elem *next,
const struct hash_ipportnet6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
index d94c585d33c5..0b61593165ef 100644
--- a/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -37,7 +37,7 @@ struct hash_mac4_elem {
/* Common functions */
-static inline bool
+static bool
hash_mac4_data_equal(const struct hash_mac4_elem *e1,
const struct hash_mac4_elem *e2,
u32 *multi)
@@ -45,7 +45,7 @@ hash_mac4_data_equal(const struct hash_mac4_elem *e1,
return ether_addr_equal(e1->ether, e2->ether);
}
-static inline bool
+static bool
hash_mac4_data_list(struct sk_buff *skb, const struct hash_mac4_elem *e)
{
if (nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether))
@@ -56,7 +56,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_mac4_data_next(struct hash_mac4_elem *next,
const struct hash_mac4_elem *e)
{
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 3d932de0ad29..136cf0781d3a 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -47,7 +47,7 @@ struct hash_net4_elem {
/* Common functions */
-static inline bool
+static bool
hash_net4_data_equal(const struct hash_net4_elem *ip1,
const struct hash_net4_elem *ip2,
u32 *multi)
@@ -56,25 +56,25 @@ hash_net4_data_equal(const struct hash_net4_elem *ip1,
ip1->cidr == ip2->cidr;
}
-static inline int
+static int
hash_net4_do_data_match(const struct hash_net4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_net4_data_set_flags(struct hash_net4_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
-static inline void
+static void
hash_net4_data_reset_flags(struct hash_net4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_net4_data_netmask(struct hash_net4_elem *elem, u8 cidr)
{
elem->ip &= ip_set_netmask(cidr);
@@ -97,7 +97,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_net4_data_next(struct hash_net4_elem *next,
const struct hash_net4_elem *d)
{
@@ -212,7 +212,7 @@ struct hash_net6_elem {
/* Common functions */
-static inline bool
+static bool
hash_net6_data_equal(const struct hash_net6_elem *ip1,
const struct hash_net6_elem *ip2,
u32 *multi)
@@ -221,25 +221,25 @@ hash_net6_data_equal(const struct hash_net6_elem *ip1,
ip1->cidr == ip2->cidr;
}
-static inline int
+static int
hash_net6_do_data_match(const struct hash_net6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_net6_data_set_flags(struct hash_net6_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
-static inline void
+static void
hash_net6_data_reset_flags(struct hash_net6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_net6_data_netmask(struct hash_net6_elem *elem, u8 cidr)
{
ip6_netmask(&elem->ip, cidr);
@@ -262,7 +262,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_net6_data_next(struct hash_net6_elem *next,
const struct hash_net6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 87b29f971226..1a04e0929738 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -62,7 +62,7 @@ struct hash_netiface4_elem {
/* Common functions */
-static inline bool
+static bool
hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
const struct hash_netiface4_elem *ip2,
u32 *multi)
@@ -74,25 +74,25 @@ hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
strcmp(ip1->iface, ip2->iface) == 0;
}
-static inline int
+static int
hash_netiface4_do_data_match(const struct hash_netiface4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netiface4_data_set_flags(struct hash_netiface4_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
-static inline void
+static void
hash_netiface4_data_reset_flags(struct hash_netiface4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netiface4_data_netmask(struct hash_netiface4_elem *elem, u8 cidr)
{
elem->ip &= ip_set_netmask(cidr);
@@ -119,7 +119,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netiface4_data_next(struct hash_netiface4_elem *next,
const struct hash_netiface4_elem *d)
{
@@ -285,7 +285,7 @@ struct hash_netiface6_elem {
/* Common functions */
-static inline bool
+static bool
hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
const struct hash_netiface6_elem *ip2,
u32 *multi)
@@ -297,25 +297,25 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
strcmp(ip1->iface, ip2->iface) == 0;
}
-static inline int
+static int
hash_netiface6_do_data_match(const struct hash_netiface6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netiface6_data_set_flags(struct hash_netiface6_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
-static inline void
+static void
hash_netiface6_data_reset_flags(struct hash_netiface6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netiface6_data_netmask(struct hash_netiface6_elem *elem, u8 cidr)
{
ip6_netmask(&elem->ip, cidr);
@@ -342,7 +342,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netiface6_data_next(struct hash_netiface6_elem *next,
const struct hash_netiface6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 4398322fad59..da4ef910b12d 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -52,7 +52,7 @@ struct hash_netnet4_elem {
/* Common functions */
-static inline bool
+static bool
hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1,
const struct hash_netnet4_elem *ip2,
u32 *multi)
@@ -61,32 +61,32 @@ hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1,
ip1->ccmp == ip2->ccmp;
}
-static inline int
+static int
hash_netnet4_do_data_match(const struct hash_netnet4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netnet4_data_set_flags(struct hash_netnet4_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
-static inline void
+static void
hash_netnet4_data_reset_flags(struct hash_netnet4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem,
struct hash_netnet4_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
-static inline void
+static void
hash_netnet4_data_netmask(struct hash_netnet4_elem *elem, u8 cidr, bool inner)
{
if (inner) {
@@ -117,7 +117,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netnet4_data_next(struct hash_netnet4_elem *next,
const struct hash_netnet4_elem *d)
{
@@ -282,7 +282,7 @@ struct hash_netnet6_elem {
/* Common functions */
-static inline bool
+static bool
hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1,
const struct hash_netnet6_elem *ip2,
u32 *multi)
@@ -292,32 +292,32 @@ hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1,
ip1->ccmp == ip2->ccmp;
}
-static inline int
+static int
hash_netnet6_do_data_match(const struct hash_netnet6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netnet6_data_set_flags(struct hash_netnet6_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
-static inline void
+static void
hash_netnet6_data_reset_flags(struct hash_netnet6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem,
struct hash_netnet6_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
-static inline void
+static void
hash_netnet6_data_netmask(struct hash_netnet6_elem *elem, u8 cidr, bool inner)
{
if (inner) {
@@ -348,7 +348,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netnet6_data_next(struct hash_netnet6_elem *next,
const struct hash_netnet6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 799f2272cc65..34448df80fb9 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -57,7 +57,7 @@ struct hash_netport4_elem {
/* Common functions */
-static inline bool
+static bool
hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
const struct hash_netport4_elem *ip2,
u32 *multi)
@@ -68,25 +68,25 @@ hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
ip1->cidr == ip2->cidr;
}
-static inline int
+static int
hash_netport4_do_data_match(const struct hash_netport4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netport4_data_set_flags(struct hash_netport4_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
-static inline void
+static void
hash_netport4_data_reset_flags(struct hash_netport4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netport4_data_netmask(struct hash_netport4_elem *elem, u8 cidr)
{
elem->ip &= ip_set_netmask(cidr);
@@ -112,7 +112,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netport4_data_next(struct hash_netport4_elem *next,
const struct hash_netport4_elem *d)
{
@@ -270,7 +270,7 @@ struct hash_netport6_elem {
/* Common functions */
-static inline bool
+static bool
hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
const struct hash_netport6_elem *ip2,
u32 *multi)
@@ -281,25 +281,25 @@ hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
ip1->cidr == ip2->cidr;
}
-static inline int
+static int
hash_netport6_do_data_match(const struct hash_netport6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netport6_data_set_flags(struct hash_netport6_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
-static inline void
+static void
hash_netport6_data_reset_flags(struct hash_netport6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netport6_data_netmask(struct hash_netport6_elem *elem, u8 cidr)
{
ip6_netmask(&elem->ip, cidr);
@@ -325,7 +325,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netport6_data_next(struct hash_netport6_elem *next,
const struct hash_netport6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index a82b70e8b9a6..934c1712cba8 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -56,7 +56,7 @@ struct hash_netportnet4_elem {
/* Common functions */
-static inline bool
+static bool
hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1,
const struct hash_netportnet4_elem *ip2,
u32 *multi)
@@ -67,32 +67,32 @@ hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1,
ip1->proto == ip2->proto;
}
-static inline int
+static int
hash_netportnet4_do_data_match(const struct hash_netportnet4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netportnet4_data_set_flags(struct hash_netportnet4_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
-static inline void
+static void
hash_netportnet4_data_reset_flags(struct hash_netportnet4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem,
struct hash_netportnet4_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
-static inline void
+static void
hash_netportnet4_data_netmask(struct hash_netportnet4_elem *elem,
u8 cidr, bool inner)
{
@@ -126,7 +126,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netportnet4_data_next(struct hash_netportnet4_elem *next,
const struct hash_netportnet4_elem *d)
{
@@ -331,7 +331,7 @@ struct hash_netportnet6_elem {
/* Common functions */
-static inline bool
+static bool
hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1,
const struct hash_netportnet6_elem *ip2,
u32 *multi)
@@ -343,32 +343,32 @@ hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1,
ip1->proto == ip2->proto;
}
-static inline int
+static int
hash_netportnet6_do_data_match(const struct hash_netportnet6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
-static inline void
+static void
hash_netportnet6_data_set_flags(struct hash_netportnet6_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
-static inline void
+static void
hash_netportnet6_data_reset_flags(struct hash_netportnet6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
-static inline void
+static void
hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem,
struct hash_netportnet6_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
-static inline void
+static void
hash_netportnet6_data_netmask(struct hash_netportnet6_elem *elem,
u8 cidr, bool inner)
{
@@ -402,7 +402,7 @@ nla_put_failure:
return true;
}
-static inline void
+static void
hash_netportnet6_data_next(struct hash_netportnet6_elem *next,
const struct hash_netportnet6_elem *d)
{
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 67ac50104e6f..cd747c0962fd 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -149,7 +149,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
kfree(e);
}
-static inline void
+static void
list_set_del(struct ip_set *set, struct set_elem *e)
{
struct list_set *map = set->data;
@@ -160,7 +160,7 @@ list_set_del(struct ip_set *set, struct set_elem *e)
call_rcu(&e->rcu, __list_set_del_rcu);
}
-static inline void
+static void
list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
{
struct list_set *map = set->data;
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 8b80ab794a92..512259f579d7 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -2402,18 +2402,22 @@ estimator_fail:
return -ENOMEM;
}
-static void __net_exit __ip_vs_cleanup(struct net *net)
+static void __net_exit __ip_vs_cleanup_batch(struct list_head *net_list)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
- ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */
- ip_vs_conn_net_cleanup(ipvs);
- ip_vs_app_net_cleanup(ipvs);
- ip_vs_protocol_net_cleanup(ipvs);
- ip_vs_control_net_cleanup(ipvs);
- ip_vs_estimator_net_cleanup(ipvs);
- IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
- net->ipvs = NULL;
+ struct netns_ipvs *ipvs;
+ struct net *net;
+
+ ip_vs_service_nets_cleanup(net_list); /* ip_vs_flush() with locks */
+ list_for_each_entry(net, net_list, exit_list) {
+ ipvs = net_ipvs(net);
+ ip_vs_conn_net_cleanup(ipvs);
+ ip_vs_app_net_cleanup(ipvs);
+ ip_vs_protocol_net_cleanup(ipvs);
+ ip_vs_control_net_cleanup(ipvs);
+ ip_vs_estimator_net_cleanup(ipvs);
+ IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
+ net->ipvs = NULL;
+ }
}
static int __net_init __ip_vs_dev_init(struct net *net)
@@ -2429,27 +2433,32 @@ hook_fail:
return ret;
}
-static void __net_exit __ip_vs_dev_cleanup(struct net *net)
+static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct netns_ipvs *ipvs;
+ struct net *net;
+
EnterFunction(2);
- nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
- ipvs->enable = 0; /* Disable packet reception */
- smp_wmb();
- ip_vs_sync_net_cleanup(ipvs);
+ list_for_each_entry(net, net_list, exit_list) {
+ ipvs = net_ipvs(net);
+ nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+ ipvs->enable = 0; /* Disable packet reception */
+ smp_wmb();
+ ip_vs_sync_net_cleanup(ipvs);
+ }
LeaveFunction(2);
}
static struct pernet_operations ipvs_core_ops = {
.init = __ip_vs_init,
- .exit = __ip_vs_cleanup,
+ .exit_batch = __ip_vs_cleanup_batch,
.id = &ip_vs_net_id,
.size = sizeof(struct netns_ipvs),
};
static struct pernet_operations ipvs_core_dev_ops = {
.init = __ip_vs_dev_init,
- .exit = __ip_vs_dev_cleanup,
+ .exit_batch = __ip_vs_dev_cleanup_batch,
};
/*
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 3cccc88ef817..3be7398901e0 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1607,14 +1607,20 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
/*
* Delete service by {netns} in the service table.
- * Called by __ip_vs_cleanup()
+ * Called by __ip_vs_batch_cleanup()
*/
-void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs)
+void ip_vs_service_nets_cleanup(struct list_head *net_list)
{
+ struct netns_ipvs *ipvs;
+ struct net *net;
+
EnterFunction(2);
/* Check for "full" addressed entries */
mutex_lock(&__ip_vs_mutex);
- ip_vs_flush(ipvs, true);
+ list_for_each_entry(net, net_list, exit_list) {
+ ipvs = net_ipvs(net);
+ ip_vs_flush(ipvs, true);
+ }
mutex_unlock(&__ip_vs_mutex);
LeaveFunction(2);
}
diff --git a/net/netfilter/ipvs/ip_vs_ovf.c b/net/netfilter/ipvs/ip_vs_ovf.c
index 78b074cd5464..c03066fdd5ca 100644
--- a/net/netfilter/ipvs/ip_vs_ovf.c
+++ b/net/netfilter/ipvs/ip_vs_ovf.c
@@ -5,7 +5,7 @@
* Authors: Raducu Deaconu <rhadoo_io@yahoo.com>
*
* Scheduler implements "overflow" loadbalancing according to number of active
- * connections , will keep all conections to the node with the highest weight
+ * connections , will keep all connections to the node with the highest weight
* and overflow to the next node if the number of connections exceeds the node's
* weight.
* Note that this scheduler might not be suitable for UDP because it only uses
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 888d3068a492..b1e300f8881b 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -407,12 +407,9 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
goto err_put;
skb_dst_drop(skb);
- if (noref) {
- if (!local)
- skb_dst_set_noref(skb, &rt->dst);
- else
- skb_dst_set(skb, dst_clone(&rt->dst));
- } else
+ if (noref)
+ skb_dst_set_noref(skb, &rt->dst);
+ else
skb_dst_set(skb, &rt->dst);
return local;
@@ -574,12 +571,9 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
goto err_put;
skb_dst_drop(skb);
- if (noref) {
- if (!local)
- skb_dst_set_noref(skb, &rt->dst);
- else
- skb_dst_set(skb, dst_clone(&rt->dst));
- } else
+ if (noref)
+ skb_dst_set_noref(skb, &rt->dst);
+ else
skb_dst_set(skb, &rt->dst);
return local;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 5cd610b547e0..0af1898af2b8 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -573,7 +573,6 @@ EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
void nf_ct_tmpl_free(struct nf_conn *tmpl)
{
nf_ct_ext_destroy(tmpl);
- nf_ct_ext_free(tmpl);
if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK)
kfree((char *)tmpl - tmpl->proto.tmpl_padto);
@@ -1417,7 +1416,6 @@ void nf_conntrack_free(struct nf_conn *ct)
WARN_ON(atomic_read(&ct->ct_general.use) != 0);
nf_ct_ext_destroy(ct);
- nf_ct_ext_free(ct);
kmem_cache_free(nf_conntrack_cachep, ct);
smp_mb__before_atomic();
atomic_dec(&net->ct.count);
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 6fba74b5aaf7..7956c9f19899 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -30,6 +30,7 @@
static DEFINE_MUTEX(nf_ct_ecache_mutex);
#define ECACHE_RETRY_WAIT (HZ/10)
+#define ECACHE_STACK_ALLOC (256 / sizeof(void *))
enum retry_state {
STATE_CONGESTED,
@@ -39,11 +40,11 @@ enum retry_state {
static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
{
- struct nf_conn *refs[16];
+ struct nf_conn *refs[ECACHE_STACK_ALLOC];
+ enum retry_state ret = STATE_DONE;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
unsigned int evicted = 0;
- enum retry_state ret = STATE_DONE;
spin_lock(&pcpu->lock);
@@ -54,10 +55,22 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
if (!nf_ct_is_confirmed(ct))
continue;
+ /* This ecache access is safe because the ct is on the
+ * pcpu dying list and we hold the spinlock -- the entry
+ * cannot be free'd until after the lock is released.
+ *
+ * This is true even if ct has a refcount of 0: the
+ * cpu that is about to free the entry must remove it
+ * from the dying list and needs the lock to do so.
+ */
e = nf_ct_ecache_find(ct);
if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL)
continue;
+ /* ct is in NFCT_ECACHE_DESTROY_FAIL state, this means
+ * the worker owns this entry: the ct will remain valid
+ * until the worker puts its ct reference.
+ */
if (nf_conntrack_event(IPCT_DESTROY, ct)) {
ret = STATE_CONGESTED;
break;
@@ -189,15 +202,15 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
if (notify == NULL)
goto out_unlock;
+ if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
+ goto out_unlock;
+
e = nf_ct_ecache_find(ct);
if (e == NULL)
goto out_unlock;
events = xchg(&e->cache, 0);
- if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
- goto out_unlock;
-
/* We make a copy of the missed event cache without taking
* the lock, thus we may send missed events twice. However,
* this does not harm and it happens very rarely. */
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index d4ed1e197921..c24e5b64b00c 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -34,21 +34,24 @@ void nf_ct_ext_destroy(struct nf_conn *ct)
t->destroy(ct);
rcu_read_unlock();
}
+
+ kfree(ct->ext);
}
EXPORT_SYMBOL(nf_ct_ext_destroy);
void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
{
unsigned int newlen, newoff, oldlen, alloc;
- struct nf_ct_ext *old, *new;
struct nf_ct_ext_type *t;
+ struct nf_ct_ext *new;
/* Conntrack must not be confirmed to avoid races on reallocation. */
WARN_ON(nf_ct_is_confirmed(ct));
- old = ct->ext;
- if (old) {
+ if (ct->ext) {
+ const struct nf_ct_ext *old = ct->ext;
+
if (__nf_ct_ext_exist(old, id))
return NULL;
oldlen = old->len;
@@ -68,22 +71,18 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
rcu_read_unlock();
alloc = max(newlen, NF_CT_EXT_PREALLOC);
- kmemleak_not_leak(old);
- new = __krealloc(old, alloc, gfp);
+ new = krealloc(ct->ext, alloc, gfp);
if (!new)
return NULL;
- if (!old) {
+ if (!ct->ext)
memset(new->offset, 0, sizeof(new->offset));
- ct->ext = new;
- } else if (new != old) {
- kfree_rcu(old, rcu);
- rcu_assign_pointer(ct->ext, new);
- }
new->offset[id] = newoff;
new->len = newlen;
memset((void *)new + newoff, 0, newlen - newoff);
+
+ ct->ext = new;
return (void *)new + newoff;
}
EXPORT_SYMBOL(nf_ct_ext_add);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index e2d13cd18875..d8d33ef52ce0 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -506,9 +506,45 @@ nla_put_failure:
return -1;
}
+/* all these functions access ct->ext. Caller must either hold a reference
+ * on ct or prevent its deletion by holding either the bucket spinlock or
+ * pcpu dying list lock.
+ */
+static int ctnetlink_dump_extinfo(struct sk_buff *skb,
+ struct nf_conn *ct, u32 type)
+{
+ if (ctnetlink_dump_acct(skb, ct, type) < 0 ||
+ ctnetlink_dump_timestamp(skb, ct) < 0 ||
+ ctnetlink_dump_helpinfo(skb, ct) < 0 ||
+ ctnetlink_dump_labels(skb, ct) < 0 ||
+ ctnetlink_dump_ct_seq_adj(skb, ct) < 0 ||
+ ctnetlink_dump_ct_synproxy(skb, ct) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct)
+{
+ if (ctnetlink_dump_status(skb, ct) < 0 ||
+ ctnetlink_dump_mark(skb, ct) < 0 ||
+ ctnetlink_dump_secctx(skb, ct) < 0 ||
+ ctnetlink_dump_id(skb, ct) < 0 ||
+ ctnetlink_dump_use(skb, ct) < 0 ||
+ ctnetlink_dump_master(skb, ct) < 0)
+ return -1;
+
+ if (!test_bit(IPS_OFFLOAD_BIT, &ct->status) &&
+ (ctnetlink_dump_timeout(skb, ct) < 0 ||
+ ctnetlink_dump_protoinfo(skb, ct) < 0))
+ return -1;
+
+ return 0;
+}
+
static int
ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
- struct nf_conn *ct)
+ struct nf_conn *ct, bool extinfo)
{
const struct nf_conntrack_zone *zone;
struct nlmsghdr *nlh;
@@ -552,23 +588,9 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
NF_CT_DEFAULT_ZONE_DIR) < 0)
goto nla_put_failure;
- if (ctnetlink_dump_status(skb, ct) < 0 ||
- ctnetlink_dump_acct(skb, ct, type) < 0 ||
- ctnetlink_dump_timestamp(skb, ct) < 0 ||
- ctnetlink_dump_helpinfo(skb, ct) < 0 ||
- ctnetlink_dump_mark(skb, ct) < 0 ||
- ctnetlink_dump_secctx(skb, ct) < 0 ||
- ctnetlink_dump_labels(skb, ct) < 0 ||
- ctnetlink_dump_id(skb, ct) < 0 ||
- ctnetlink_dump_use(skb, ct) < 0 ||
- ctnetlink_dump_master(skb, ct) < 0 ||
- ctnetlink_dump_ct_seq_adj(skb, ct) < 0 ||
- ctnetlink_dump_ct_synproxy(skb, ct) < 0)
+ if (ctnetlink_dump_info(skb, ct) < 0)
goto nla_put_failure;
-
- if (!test_bit(IPS_OFFLOAD_BIT, &ct->status) &&
- (ctnetlink_dump_timeout(skb, ct) < 0 ||
- ctnetlink_dump_protoinfo(skb, ct) < 0))
+ if (extinfo && ctnetlink_dump_extinfo(skb, ct, type) < 0)
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -953,13 +975,11 @@ restart:
if (!ctnetlink_filter_match(ct, cb->data))
continue;
- rcu_read_lock();
res =
ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
- ct);
- rcu_read_unlock();
+ ct, true);
if (res < 0) {
nf_conntrack_get(&ct->ct_general);
cb->args[1] = (unsigned long)ct;
@@ -1364,10 +1384,8 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
return -ENOMEM;
}
- rcu_read_lock();
err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
- NFNL_MSG_TYPE(nlh->nlmsg_type), ct);
- rcu_read_unlock();
+ NFNL_MSG_TYPE(nlh->nlmsg_type), ct, true);
nf_ct_put(ct);
if (err <= 0)
goto free;
@@ -1429,12 +1447,18 @@ restart:
continue;
cb->args[1] = 0;
}
- rcu_read_lock();
+
+ /* We can't dump extension info for the unconfirmed
+ * list because unconfirmed conntracks can have
+ * ct->ext reallocated (and thus freed).
+ *
+ * In the dying list case ct->ext can't be free'd
+ * until after we drop pcpu->lock.
+ */
res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
- ct);
- rcu_read_unlock();
+ ct, dying ? true : false);
if (res < 0) {
if (!atomic_inc_not_zero(&ct->ct_general.use))
continue;
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index 097deba7441a..c2e3dff773bc 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -235,11 +235,7 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
}
/* Need to track icmp error message? */
- if (icmph->type != ICMP_DEST_UNREACH &&
- icmph->type != ICMP_SOURCE_QUENCH &&
- icmph->type != ICMP_TIME_EXCEEDED &&
- icmph->type != ICMP_PARAMETERPROB &&
- icmph->type != ICMP_REDIRECT)
+ if (!icmp_is_err(icmph->type))
return NF_ACCEPT;
memset(&outer_daddr, 0, sizeof(outer_daddr));
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 712a428509ad..0d2243945f1d 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -151,11 +151,64 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
}
}
+static int nft_netdev_register_hooks(struct net *net,
+ struct list_head *hook_list)
+{
+ struct nft_hook *hook;
+ int err, j;
+
+ j = 0;
+ list_for_each_entry(hook, hook_list, list) {
+ err = nf_register_net_hook(net, &hook->ops);
+ if (err < 0)
+ goto err_register;
+
+ j++;
+ }
+ return 0;
+
+err_register:
+ list_for_each_entry(hook, hook_list, list) {
+ if (j-- <= 0)
+ break;
+
+ nf_unregister_net_hook(net, &hook->ops);
+ }
+ return err;
+}
+
+static void nft_netdev_unregister_hooks(struct net *net,
+ struct list_head *hook_list)
+{
+ struct nft_hook *hook;
+
+ list_for_each_entry(hook, hook_list, list)
+ nf_unregister_net_hook(net, &hook->ops);
+}
+
+static int nft_register_basechain_hooks(struct net *net, int family,
+ struct nft_base_chain *basechain)
+{
+ if (family == NFPROTO_NETDEV)
+ return nft_netdev_register_hooks(net, &basechain->hook_list);
+
+ return nf_register_net_hook(net, &basechain->ops);
+}
+
+static void nft_unregister_basechain_hooks(struct net *net, int family,
+ struct nft_base_chain *basechain)
+{
+ if (family == NFPROTO_NETDEV)
+ nft_netdev_unregister_hooks(net, &basechain->hook_list);
+ else
+ nf_unregister_net_hook(net, &basechain->ops);
+}
+
static int nf_tables_register_hook(struct net *net,
const struct nft_table *table,
struct nft_chain *chain)
{
- const struct nft_base_chain *basechain;
+ struct nft_base_chain *basechain;
const struct nf_hook_ops *ops;
if (table->flags & NFT_TABLE_F_DORMANT ||
@@ -168,14 +221,14 @@ static int nf_tables_register_hook(struct net *net,
if (basechain->type->ops_register)
return basechain->type->ops_register(net, ops);
- return nf_register_net_hook(net, ops);
+ return nft_register_basechain_hooks(net, table->family, basechain);
}
static void nf_tables_unregister_hook(struct net *net,
const struct nft_table *table,
struct nft_chain *chain)
{
- const struct nft_base_chain *basechain;
+ struct nft_base_chain *basechain;
const struct nf_hook_ops *ops;
if (table->flags & NFT_TABLE_F_DORMANT ||
@@ -187,7 +240,7 @@ static void nf_tables_unregister_hook(struct net *net,
if (basechain->type->ops_unregister)
return basechain->type->ops_unregister(net, ops);
- nf_unregister_net_hook(net, ops);
+ nft_unregister_basechain_hooks(net, table->family, basechain);
}
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
@@ -742,7 +795,8 @@ static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt)
if (cnt && i++ == cnt)
break;
- nf_unregister_net_hook(net, &nft_base_chain(chain)->ops);
+ nft_unregister_basechain_hooks(net, table->family,
+ nft_base_chain(chain));
}
}
@@ -757,14 +811,16 @@ static int nf_tables_table_enable(struct net *net, struct nft_table *table)
if (!nft_is_base_chain(chain))
continue;
- err = nf_register_net_hook(net, &nft_base_chain(chain)->ops);
+ err = nft_register_basechain_hooks(net, table->family,
+ nft_base_chain(chain));
if (err < 0)
- goto err;
+ goto err_register_hooks;
i++;
}
return 0;
-err:
+
+err_register_hooks:
if (i)
nft_table_disable(net, table, i);
return err;
@@ -1225,6 +1281,46 @@ nla_put_failure:
return -ENOSPC;
}
+static int nft_dump_basechain_hook(struct sk_buff *skb, int family,
+ const struct nft_base_chain *basechain)
+{
+ const struct nf_hook_ops *ops = &basechain->ops;
+ struct nft_hook *hook, *first = NULL;
+ struct nlattr *nest, *nest_devs;
+ int n = 0;
+
+ nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK);
+ if (nest == NULL)
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
+ goto nla_put_failure;
+
+ if (family == NFPROTO_NETDEV) {
+ nest_devs = nla_nest_start_noflag(skb, NFTA_HOOK_DEVS);
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ if (!first)
+ first = hook;
+
+ if (nla_put_string(skb, NFTA_DEVICE_NAME,
+ hook->ops.dev->name))
+ goto nla_put_failure;
+ n++;
+ }
+ nla_nest_end(skb, nest_devs);
+
+ if (n == 1 &&
+ nla_put_string(skb, NFTA_HOOK_DEV, first->ops.dev->name))
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, nest);
+
+ return 0;
+nla_put_failure:
+ return -1;
+}
+
static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
u32 portid, u32 seq, int event, u32 flags,
int family, const struct nft_table *table,
@@ -1253,21 +1349,10 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
if (nft_is_base_chain(chain)) {
const struct nft_base_chain *basechain = nft_base_chain(chain);
- const struct nf_hook_ops *ops = &basechain->ops;
struct nft_stats __percpu *stats;
- struct nlattr *nest;
- nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK);
- if (nest == NULL)
- goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
- goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
+ if (nft_dump_basechain_hook(skb, family, basechain))
goto nla_put_failure;
- if (basechain->dev_name[0] &&
- nla_put_string(skb, NFTA_HOOK_DEV, basechain->dev_name))
- goto nla_put_failure;
- nla_nest_end(skb, nest);
if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
htonl(basechain->policy)))
@@ -1485,6 +1570,7 @@ static void nf_tables_chain_free_chain_rules(struct nft_chain *chain)
static void nf_tables_chain_destroy(struct nft_ctx *ctx)
{
struct nft_chain *chain = ctx->chain;
+ struct nft_hook *hook, *next;
if (WARN_ON(chain->use > 0))
return;
@@ -1495,6 +1581,13 @@ static void nf_tables_chain_destroy(struct nft_ctx *ctx)
if (nft_is_base_chain(chain)) {
struct nft_base_chain *basechain = nft_base_chain(chain);
+ if (ctx->family == NFPROTO_NETDEV) {
+ list_for_each_entry_safe(hook, next,
+ &basechain->hook_list, list) {
+ list_del_rcu(&hook->list);
+ kfree_rcu(hook, rcu);
+ }
+ }
module_put(basechain->type->owner);
if (rcu_access_pointer(basechain->stats)) {
static_branch_dec(&nft_counters_enabled);
@@ -1508,13 +1601,125 @@ static void nf_tables_chain_destroy(struct nft_ctx *ctx)
}
}
+static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
+ const struct nlattr *attr)
+{
+ struct net_device *dev;
+ char ifname[IFNAMSIZ];
+ struct nft_hook *hook;
+ int err;
+
+ hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL);
+ if (!hook) {
+ err = -ENOMEM;
+ goto err_hook_alloc;
+ }
+
+ nla_strlcpy(ifname, attr, IFNAMSIZ);
+ dev = __dev_get_by_name(net, ifname);
+ if (!dev) {
+ err = -ENOENT;
+ goto err_hook_dev;
+ }
+ hook->ops.dev = dev;
+
+ return hook;
+
+err_hook_dev:
+ kfree(hook);
+err_hook_alloc:
+ return ERR_PTR(err);
+}
+
+static bool nft_hook_list_find(struct list_head *hook_list,
+ const struct nft_hook *this)
+{
+ struct nft_hook *hook;
+
+ list_for_each_entry(hook, hook_list, list) {
+ if (this->ops.dev == hook->ops.dev)
+ return true;
+ }
+
+ return false;
+}
+
+static int nf_tables_parse_netdev_hooks(struct net *net,
+ const struct nlattr *attr,
+ struct list_head *hook_list)
+{
+ struct nft_hook *hook, *next;
+ const struct nlattr *tmp;
+ int rem, n = 0, err;
+
+ nla_for_each_nested(tmp, attr, rem) {
+ if (nla_type(tmp) != NFTA_DEVICE_NAME) {
+ err = -EINVAL;
+ goto err_hook;
+ }
+
+ hook = nft_netdev_hook_alloc(net, tmp);
+ if (IS_ERR(hook)) {
+ err = PTR_ERR(hook);
+ goto err_hook;
+ }
+ if (nft_hook_list_find(hook_list, hook)) {
+ err = -EEXIST;
+ goto err_hook;
+ }
+ list_add_tail(&hook->list, hook_list);
+ n++;
+
+ if (n == NFT_NETDEVICE_MAX) {
+ err = -EFBIG;
+ goto err_hook;
+ }
+ }
+ if (!n)
+ return -EINVAL;
+
+ return 0;
+
+err_hook:
+ list_for_each_entry_safe(hook, next, hook_list, list) {
+ list_del(&hook->list);
+ kfree(hook);
+ }
+ return err;
+}
+
struct nft_chain_hook {
u32 num;
s32 priority;
const struct nft_chain_type *type;
- struct net_device *dev;
+ struct list_head list;
};
+static int nft_chain_parse_netdev(struct net *net,
+ struct nlattr *tb[],
+ struct list_head *hook_list)
+{
+ struct nft_hook *hook;
+ int err;
+
+ if (tb[NFTA_HOOK_DEV]) {
+ hook = nft_netdev_hook_alloc(net, tb[NFTA_HOOK_DEV]);
+ if (IS_ERR(hook))
+ return PTR_ERR(hook);
+
+ list_add_tail(&hook->list, hook_list);
+ } else if (tb[NFTA_HOOK_DEVS]) {
+ err = nf_tables_parse_netdev_hooks(net, tb[NFTA_HOOK_DEVS],
+ hook_list);
+ if (err < 0)
+ return err;
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int nft_chain_parse_hook(struct net *net,
const struct nlattr * const nla[],
struct nft_chain_hook *hook, u8 family,
@@ -1522,7 +1727,6 @@ static int nft_chain_parse_hook(struct net *net,
{
struct nlattr *ha[NFTA_HOOK_MAX + 1];
const struct nft_chain_type *type;
- struct net_device *dev;
int err;
lockdep_assert_held(&net->nft.commit_mutex);
@@ -1560,23 +1764,14 @@ static int nft_chain_parse_hook(struct net *net,
hook->type = type;
- hook->dev = NULL;
+ INIT_LIST_HEAD(&hook->list);
if (family == NFPROTO_NETDEV) {
- char ifname[IFNAMSIZ];
-
- if (!ha[NFTA_HOOK_DEV]) {
- module_put(type->owner);
- return -EOPNOTSUPP;
- }
-
- nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
- dev = __dev_get_by_name(net, ifname);
- if (!dev) {
+ err = nft_chain_parse_netdev(net, ha, &hook->list);
+ if (err < 0) {
module_put(type->owner);
- return -ENOENT;
+ return err;
}
- hook->dev = dev;
- } else if (ha[NFTA_HOOK_DEV]) {
+ } else if (ha[NFTA_HOOK_DEV] || ha[NFTA_HOOK_DEVS]) {
module_put(type->owner);
return -EOPNOTSUPP;
}
@@ -1586,6 +1781,12 @@ static int nft_chain_parse_hook(struct net *net,
static void nft_chain_release_hook(struct nft_chain_hook *hook)
{
+ struct nft_hook *h, *next;
+
+ list_for_each_entry_safe(h, next, &hook->list, list) {
+ list_del(&h->list);
+ kfree(h);
+ }
module_put(hook->type->owner);
}
@@ -1610,6 +1811,49 @@ static struct nft_rule **nf_tables_chain_alloc_rules(const struct nft_chain *cha
return kvmalloc(alloc, GFP_KERNEL);
}
+static void nft_basechain_hook_init(struct nf_hook_ops *ops, u8 family,
+ const struct nft_chain_hook *hook,
+ struct nft_chain *chain)
+{
+ ops->pf = family;
+ ops->hooknum = hook->num;
+ ops->priority = hook->priority;
+ ops->priv = chain;
+ ops->hook = hook->type->hooks[ops->hooknum];
+}
+
+static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
+ struct nft_chain_hook *hook, u32 flags)
+{
+ struct nft_chain *chain;
+ struct nft_hook *h;
+
+ basechain->type = hook->type;
+ INIT_LIST_HEAD(&basechain->hook_list);
+ chain = &basechain->chain;
+
+ if (family == NFPROTO_NETDEV) {
+ list_splice_init(&hook->list, &basechain->hook_list);
+ list_for_each_entry(h, &basechain->hook_list, list)
+ nft_basechain_hook_init(&h->ops, family, hook, chain);
+
+ basechain->ops.hooknum = hook->num;
+ basechain->ops.priority = hook->priority;
+ } else {
+ nft_basechain_hook_init(&basechain->ops, family, hook, chain);
+ }
+
+ chain->flags |= NFT_BASE_CHAIN | flags;
+ basechain->policy = NF_ACCEPT;
+ if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
+ nft_chain_offload_priority(basechain) < 0)
+ return -EOPNOTSUPP;
+
+ flow_block_init(&basechain->flow_block);
+
+ return 0;
+}
+
static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
u8 policy, u32 flags)
{
@@ -1628,7 +1872,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (nla[NFTA_CHAIN_HOOK]) {
struct nft_chain_hook hook;
- struct nf_hook_ops *ops;
err = nft_chain_parse_hook(net, nla, &hook, family, true);
if (err < 0)
@@ -1639,9 +1882,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
nft_chain_release_hook(&hook);
return -ENOMEM;
}
-
- if (hook.dev != NULL)
- strncpy(basechain->dev_name, hook.dev->name, IFNAMSIZ);
+ chain = &basechain->chain;
if (nla[NFTA_CHAIN_COUNTERS]) {
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
@@ -1654,24 +1895,12 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
static_branch_inc(&nft_counters_enabled);
}
- basechain->type = hook.type;
- chain = &basechain->chain;
-
- ops = &basechain->ops;
- ops->pf = family;
- ops->hooknum = hook.num;
- ops->priority = hook.priority;
- ops->priv = chain;
- ops->hook = hook.type->hooks[ops->hooknum];
- ops->dev = hook.dev;
-
- chain->flags |= NFT_BASE_CHAIN | flags;
- basechain->policy = NF_ACCEPT;
- if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
- nft_chain_offload_priority(basechain) < 0)
- return -EOPNOTSUPP;
-
- flow_block_init(&basechain->flow_block);
+ err = nft_basechain_init(basechain, family, &hook, flags);
+ if (err < 0) {
+ nft_chain_release_hook(&hook);
+ kfree(basechain);
+ return err;
+ }
} else {
chain = kzalloc(sizeof(*chain), GFP_KERNEL);
if (chain == NULL)
@@ -1731,6 +1960,25 @@ err1:
return err;
}
+static bool nft_hook_list_equal(struct list_head *hook_list1,
+ struct list_head *hook_list2)
+{
+ struct nft_hook *hook;
+ int n = 0, m = 0;
+
+ n = 0;
+ list_for_each_entry(hook, hook_list2, list) {
+ if (!nft_hook_list_find(hook_list1, hook))
+ return false;
+
+ n++;
+ }
+ list_for_each_entry(hook, hook_list1, list)
+ m++;
+
+ return n == m;
+}
+
static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
u32 flags)
{
@@ -1762,12 +2010,19 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
return -EBUSY;
}
- ops = &basechain->ops;
- if (ops->hooknum != hook.num ||
- ops->priority != hook.priority ||
- ops->dev != hook.dev) {
- nft_chain_release_hook(&hook);
- return -EBUSY;
+ if (ctx->family == NFPROTO_NETDEV) {
+ if (!nft_hook_list_equal(&basechain->hook_list,
+ &hook.list)) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
+ }
+ } else {
+ ops = &basechain->ops;
+ if (ops->hooknum != hook.num ||
+ ops->priority != hook.priority) {
+ nft_chain_release_hook(&hook);
+ return -EBUSY;
+ }
}
nft_chain_release_hook(&hook);
}
@@ -5626,43 +5881,6 @@ nft_flowtable_lookup_byhandle(const struct nft_table *table,
return ERR_PTR(-ENOENT);
}
-static int nf_tables_parse_devices(const struct nft_ctx *ctx,
- const struct nlattr *attr,
- struct net_device *dev_array[], int *len)
-{
- const struct nlattr *tmp;
- struct net_device *dev;
- char ifname[IFNAMSIZ];
- int rem, n = 0, err;
-
- nla_for_each_nested(tmp, attr, rem) {
- if (nla_type(tmp) != NFTA_DEVICE_NAME) {
- err = -EINVAL;
- goto err1;
- }
-
- nla_strlcpy(ifname, tmp, IFNAMSIZ);
- dev = __dev_get_by_name(ctx->net, ifname);
- if (!dev) {
- err = -ENOENT;
- goto err1;
- }
-
- dev_array[n++] = dev;
- if (n == NFT_FLOWTABLE_DEVICE_MAX) {
- err = -EFBIG;
- goto err1;
- }
- }
- if (!len)
- return -EINVAL;
-
- err = 0;
-err1:
- *len = n;
- return err;
-}
-
static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] = {
[NFTA_FLOWTABLE_HOOK_NUM] = { .type = NLA_U32 },
[NFTA_FLOWTABLE_HOOK_PRIORITY] = { .type = NLA_U32 },
@@ -5673,11 +5891,10 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
const struct nlattr *attr,
struct nft_flowtable *flowtable)
{
- struct net_device *dev_array[NFT_FLOWTABLE_DEVICE_MAX];
struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1];
- struct nf_hook_ops *ops;
+ struct nft_hook *hook;
int hooknum, priority;
- int err, n = 0, i;
+ int err;
err = nla_parse_nested_deprecated(tb, NFTA_FLOWTABLE_HOOK_MAX, attr,
nft_flowtable_hook_policy, NULL);
@@ -5695,27 +5912,21 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
- err = nf_tables_parse_devices(ctx, tb[NFTA_FLOWTABLE_HOOK_DEVS],
- dev_array, &n);
+ err = nf_tables_parse_netdev_hooks(ctx->net,
+ tb[NFTA_FLOWTABLE_HOOK_DEVS],
+ &flowtable->hook_list);
if (err < 0)
return err;
- ops = kcalloc(n, sizeof(struct nf_hook_ops), GFP_KERNEL);
- if (!ops)
- return -ENOMEM;
-
- flowtable->hooknum = hooknum;
- flowtable->priority = priority;
- flowtable->ops = ops;
- flowtable->ops_len = n;
+ flowtable->hooknum = hooknum;
+ flowtable->data.priority = priority;
- for (i = 0; i < n; i++) {
- flowtable->ops[i].pf = NFPROTO_NETDEV;
- flowtable->ops[i].hooknum = hooknum;
- flowtable->ops[i].priority = priority;
- flowtable->ops[i].priv = &flowtable->data;
- flowtable->ops[i].hook = flowtable->data.type->hook;
- flowtable->ops[i].dev = dev_array[i];
+ list_for_each_entry(hook, &flowtable->hook_list, list) {
+ hook->ops.pf = NFPROTO_NETDEV;
+ hook->ops.hooknum = hooknum;
+ hook->ops.priority = priority;
+ hook->ops.priv = &flowtable->data;
+ hook->ops.hook = flowtable->data.type->hook;
}
return err;
@@ -5755,14 +5966,51 @@ nft_flowtable_type_get(struct net *net, u8 family)
static void nft_unregister_flowtable_net_hooks(struct net *net,
struct nft_flowtable *flowtable)
{
- int i;
+ struct nft_hook *hook;
- for (i = 0; i < flowtable->ops_len; i++) {
- if (!flowtable->ops[i].dev)
- continue;
+ list_for_each_entry(hook, &flowtable->hook_list, list)
+ nf_unregister_net_hook(net, &hook->ops);
+}
- nf_unregister_net_hook(net, &flowtable->ops[i]);
+static int nft_register_flowtable_net_hooks(struct net *net,
+ struct nft_table *table,
+ struct nft_flowtable *flowtable)
+{
+ struct nft_hook *hook, *hook2, *next;
+ struct nft_flowtable *ft;
+ int err, i = 0;
+
+ list_for_each_entry(hook, &flowtable->hook_list, list) {
+ list_for_each_entry(ft, &table->flowtables, list) {
+ list_for_each_entry(hook2, &ft->hook_list, list) {
+ if (hook->ops.dev == hook2->ops.dev &&
+ hook->ops.pf == hook2->ops.pf) {
+ err = -EBUSY;
+ goto err_unregister_net_hooks;
+ }
+ }
+ }
+
+ err = nf_register_net_hook(net, &hook->ops);
+ if (err < 0)
+ goto err_unregister_net_hooks;
+
+ i++;
}
+
+ return 0;
+
+err_unregister_net_hooks:
+ list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+ if (i-- <= 0)
+ break;
+
+ nf_unregister_net_hook(net, &hook->ops);
+ list_del_rcu(&hook->list);
+ kfree_rcu(hook, rcu);
+ }
+
+ return err;
}
static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
@@ -5773,12 +6021,13 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nf_flowtable_type *type;
- struct nft_flowtable *flowtable, *ft;
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
+ struct nft_flowtable *flowtable;
+ struct nft_hook *hook, *next;
struct nft_table *table;
struct nft_ctx ctx;
- int err, i, k;
+ int err;
if (!nla[NFTA_FLOWTABLE_TABLE] ||
!nla[NFTA_FLOWTABLE_NAME] ||
@@ -5817,6 +6066,7 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
flowtable->table = table;
flowtable->handle = nf_tables_alloc_handle(table);
+ INIT_LIST_HEAD(&flowtable->hook_list);
flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
if (!flowtable->name) {
@@ -5840,43 +6090,24 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
if (err < 0)
goto err4;
- for (i = 0; i < flowtable->ops_len; i++) {
- if (!flowtable->ops[i].dev)
- continue;
-
- list_for_each_entry(ft, &table->flowtables, list) {
- for (k = 0; k < ft->ops_len; k++) {
- if (!ft->ops[k].dev)
- continue;
-
- if (flowtable->ops[i].dev == ft->ops[k].dev &&
- flowtable->ops[i].pf == ft->ops[k].pf) {
- err = -EBUSY;
- goto err5;
- }
- }
- }
-
- err = nf_register_net_hook(net, &flowtable->ops[i]);
- if (err < 0)
- goto err5;
- }
+ err = nft_register_flowtable_net_hooks(ctx.net, table, flowtable);
+ if (err < 0)
+ goto err4;
err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
if (err < 0)
- goto err6;
+ goto err5;
list_add_tail_rcu(&flowtable->list, &table->flowtables);
table->use++;
return 0;
-err6:
- i = flowtable->ops_len;
err5:
- for (k = i - 1; k >= 0; k--)
- nf_unregister_net_hook(net, &flowtable->ops[k]);
-
- kfree(flowtable->ops);
+ list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+ nf_unregister_net_hook(net, &hook->ops);
+ list_del_rcu(&hook->list);
+ kfree_rcu(hook, rcu);
+ }
err4:
flowtable->data.type->free(&flowtable->data);
err3:
@@ -5943,8 +6174,8 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
{
struct nlattr *nest, *nest_devs;
struct nfgenmsg *nfmsg;
+ struct nft_hook *hook;
struct nlmsghdr *nlh;
- int i;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
@@ -5967,18 +6198,15 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
if (!nest)
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) ||
- nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority)))
+ nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->data.priority)))
goto nla_put_failure;
nest_devs = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK_DEVS);
if (!nest_devs)
goto nla_put_failure;
- for (i = 0; i < flowtable->ops_len; i++) {
- const struct net_device *dev = READ_ONCE(flowtable->ops[i].dev);
-
- if (dev &&
- nla_put_string(skb, NFTA_DEVICE_NAME, dev->name))
+ list_for_each_entry_rcu(hook, &flowtable->hook_list, list) {
+ if (nla_put_string(skb, NFTA_DEVICE_NAME, hook->ops.dev->name))
goto nla_put_failure;
}
nla_nest_end(skb, nest_devs);
@@ -6169,7 +6397,12 @@ err:
static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
{
- kfree(flowtable->ops);
+ struct nft_hook *hook, *next;
+
+ list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+ list_del_rcu(&hook->list);
+ kfree(hook);
+ }
kfree(flowtable->name);
flowtable->data.type->free(&flowtable->data);
module_put(flowtable->data.type->owner);
@@ -6209,14 +6442,15 @@ nla_put_failure:
static void nft_flowtable_event(unsigned long event, struct net_device *dev,
struct nft_flowtable *flowtable)
{
- int i;
+ struct nft_hook *hook;
- for (i = 0; i < flowtable->ops_len; i++) {
- if (flowtable->ops[i].dev != dev)
+ list_for_each_entry(hook, &flowtable->hook_list, list) {
+ if (hook->ops.dev != dev)
continue;
- nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]);
- flowtable->ops[i].dev = NULL;
+ nf_unregister_net_hook(dev_net(dev), &hook->ops);
+ list_del_rcu(&hook->list);
+ kfree_rcu(hook, rcu);
break;
}
}
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index e25dab8128db..cdea3010c7a0 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -132,13 +132,13 @@ static void nft_flow_offload_common_init(struct flow_cls_common_offload *common,
common->extack = extack;
}
-static int nft_setup_cb_call(struct nft_base_chain *basechain,
- enum tc_setup_type type, void *type_data)
+static int nft_setup_cb_call(enum tc_setup_type type, void *type_data,
+ struct list_head *cb_list)
{
struct flow_block_cb *block_cb;
int err;
- list_for_each_entry(block_cb, &basechain->flow_block.cb_list, list) {
+ list_for_each_entry(block_cb, cb_list, list) {
err = block_cb->cb(type, type_data, block_cb->cb_priv);
if (err < 0)
return err;
@@ -155,32 +155,44 @@ int nft_chain_offload_priority(struct nft_base_chain *basechain)
return 0;
}
+static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow,
+ const struct nft_base_chain *basechain,
+ const struct nft_rule *rule,
+ const struct nft_flow_rule *flow,
+ enum flow_cls_command command)
+{
+ struct netlink_ext_ack extack;
+ __be16 proto = ETH_P_ALL;
+
+ memset(cls_flow, 0, sizeof(*cls_flow));
+
+ if (flow)
+ proto = flow->proto;
+
+ nft_flow_offload_common_init(&cls_flow->common, proto,
+ basechain->ops.priority, &extack);
+ cls_flow->command = command;
+ cls_flow->cookie = (unsigned long) rule;
+ if (flow)
+ cls_flow->rule = flow->rule;
+}
+
static int nft_flow_offload_rule(struct nft_chain *chain,
struct nft_rule *rule,
struct nft_flow_rule *flow,
enum flow_cls_command command)
{
- struct flow_cls_offload cls_flow = {};
+ struct flow_cls_offload cls_flow;
struct nft_base_chain *basechain;
- struct netlink_ext_ack extack;
- __be16 proto = ETH_P_ALL;
if (!nft_is_base_chain(chain))
return -EOPNOTSUPP;
basechain = nft_base_chain(chain);
+ nft_flow_cls_offload_setup(&cls_flow, basechain, rule, flow, command);
- if (flow)
- proto = flow->proto;
-
- nft_flow_offload_common_init(&cls_flow.common, proto,
- basechain->ops.priority, &extack);
- cls_flow.command = command;
- cls_flow.cookie = (unsigned long) rule;
- if (flow)
- cls_flow.rule = flow->rule;
-
- return nft_setup_cb_call(basechain, TC_SETUP_CLSFLOWER, &cls_flow);
+ return nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow,
+ &basechain->flow_block.cb_list);
}
static int nft_flow_offload_bind(struct flow_block_offload *bo,
@@ -194,6 +206,16 @@ static int nft_flow_offload_unbind(struct flow_block_offload *bo,
struct nft_base_chain *basechain)
{
struct flow_block_cb *block_cb, *next;
+ struct flow_cls_offload cls_flow;
+ struct nft_chain *chain;
+ struct nft_rule *rule;
+
+ chain = &basechain->chain;
+ list_for_each_entry(rule, &chain->rules, list) {
+ nft_flow_cls_offload_setup(&cls_flow, basechain, rule, NULL,
+ FLOW_CLS_DESTROY);
+ nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow, &bo->cb_list);
+ }
list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
list_del(&block_cb->list);
@@ -224,20 +246,30 @@ static int nft_block_setup(struct nft_base_chain *basechain,
return err;
}
+static void nft_flow_block_offload_init(struct flow_block_offload *bo,
+ struct net *net,
+ enum flow_block_command cmd,
+ struct nft_base_chain *basechain,
+ struct netlink_ext_ack *extack)
+{
+ memset(bo, 0, sizeof(*bo));
+ bo->net = net;
+ bo->block = &basechain->flow_block;
+ bo->command = cmd;
+ bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ bo->extack = extack;
+ INIT_LIST_HEAD(&bo->cb_list);
+}
+
static int nft_block_offload_cmd(struct nft_base_chain *chain,
struct net_device *dev,
enum flow_block_command cmd)
{
struct netlink_ext_ack extack = {};
- struct flow_block_offload bo = {};
+ struct flow_block_offload bo;
int err;
- bo.net = dev_net(dev);
- bo.block = &chain->flow_block;
- bo.command = cmd;
- bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
- bo.extack = &extack;
- INIT_LIST_HEAD(&bo.cb_list);
+ nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack);
err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
if (err < 0)
@@ -253,17 +285,12 @@ static void nft_indr_block_ing_cmd(struct net_device *dev,
enum flow_block_command cmd)
{
struct netlink_ext_ack extack = {};
- struct flow_block_offload bo = {};
+ struct flow_block_offload bo;
if (!chain)
return;
- bo.net = dev_net(dev);
- bo.block = &chain->flow_block;
- bo.command = cmd;
- bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
- bo.extack = &extack;
- INIT_LIST_HEAD(&bo.cb_list);
+ nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack);
cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
@@ -274,15 +301,10 @@ static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
struct net_device *dev,
enum flow_block_command cmd)
{
- struct flow_block_offload bo = {};
struct netlink_ext_ack extack = {};
+ struct flow_block_offload bo;
- bo.net = dev_net(dev);
- bo.block = &chain->flow_block;
- bo.command = cmd;
- bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
- bo.extack = &extack;
- INIT_LIST_HEAD(&bo.cb_list);
+ nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack);
flow_indr_block_call(dev, &bo, cmd);
@@ -294,32 +316,73 @@ static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
#define FLOW_SETUP_BLOCK TC_SETUP_BLOCK
-static int nft_flow_offload_chain(struct nft_chain *chain,
- u8 *ppolicy,
+static int nft_chain_offload_cmd(struct nft_base_chain *basechain,
+ struct net_device *dev,
+ enum flow_block_command cmd)
+{
+ int err;
+
+ if (dev->netdev_ops->ndo_setup_tc)
+ err = nft_block_offload_cmd(basechain, dev, cmd);
+ else
+ err = nft_indr_block_offload_cmd(basechain, dev, cmd);
+
+ return err;
+}
+
+static int nft_flow_block_chain(struct nft_base_chain *basechain,
+ const struct net_device *this_dev,
+ enum flow_block_command cmd)
+{
+ struct net_device *dev;
+ struct nft_hook *hook;
+ int err, i = 0;
+
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ dev = hook->ops.dev;
+ if (this_dev && this_dev != dev)
+ continue;
+
+ err = nft_chain_offload_cmd(basechain, dev, cmd);
+ if (err < 0 && cmd == FLOW_BLOCK_BIND) {
+ if (!this_dev)
+ goto err_flow_block;
+
+ return err;
+ }
+ i++;
+ }
+
+ return 0;
+
+err_flow_block:
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ if (i-- <= 0)
+ break;
+
+ dev = hook->ops.dev;
+ nft_chain_offload_cmd(basechain, dev, FLOW_BLOCK_UNBIND);
+ }
+ return err;
+}
+
+static int nft_flow_offload_chain(struct nft_chain *chain, u8 *ppolicy,
enum flow_block_command cmd)
{
struct nft_base_chain *basechain;
- struct net_device *dev;
u8 policy;
if (!nft_is_base_chain(chain))
return -EOPNOTSUPP;
basechain = nft_base_chain(chain);
- dev = basechain->ops.dev;
- if (!dev)
- return -EOPNOTSUPP;
-
policy = ppolicy ? *ppolicy : basechain->policy;
/* Only default policy to accept is supported for now. */
if (cmd == FLOW_BLOCK_BIND && policy == NF_DROP)
return -EOPNOTSUPP;
- if (dev->netdev_ops->ndo_setup_tc)
- return nft_block_offload_cmd(basechain, dev, cmd);
- else
- return nft_indr_block_offload_cmd(basechain, dev, cmd);
+ return nft_flow_block_chain(basechain, NULL, cmd);
}
int nft_flow_rule_offload_commit(struct net *net)
@@ -386,6 +449,7 @@ static struct nft_chain *__nft_offload_get_chain(struct net_device *dev)
{
struct nft_base_chain *basechain;
struct net *net = dev_net(dev);
+ struct nft_hook *hook, *found;
const struct nft_table *table;
struct nft_chain *chain;
@@ -398,8 +462,16 @@ static struct nft_chain *__nft_offload_get_chain(struct net_device *dev)
!(chain->flags & NFT_CHAIN_HW_OFFLOAD))
continue;
+ found = NULL;
basechain = nft_base_chain(chain);
- if (strncmp(basechain->dev_name, dev->name, IFNAMSIZ))
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ if (hook->ops.dev != dev)
+ continue;
+
+ found = hook;
+ break;
+ }
+ if (!found)
continue;
return chain;
@@ -427,18 +499,6 @@ static void nft_indr_block_cb(struct net_device *dev,
mutex_unlock(&net->nft.commit_mutex);
}
-static void nft_offload_chain_clean(struct nft_chain *chain)
-{
- struct nft_rule *rule;
-
- list_for_each_entry(rule, &chain->rules, list) {
- nft_flow_offload_rule(chain, rule,
- NULL, FLOW_CLS_DESTROY);
- }
-
- nft_flow_offload_chain(chain, NULL, FLOW_BLOCK_UNBIND);
-}
-
static int nft_offload_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -449,7 +509,9 @@ static int nft_offload_netdev_event(struct notifier_block *this,
mutex_lock(&net->nft.commit_mutex);
chain = __nft_offload_get_chain(dev);
if (chain)
- nft_offload_chain_clean(chain);
+ nft_flow_block_chain(nft_base_chain(chain), dev,
+ FLOW_BLOCK_UNBIND);
+
mutex_unlock(&net->nft.commit_mutex);
return NOTIFY_DONE;
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index b5d5d071d765..c78d01bc02e9 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -287,28 +287,35 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev,
struct nft_ctx *ctx)
{
struct nft_base_chain *basechain = nft_base_chain(ctx->chain);
+ struct nft_hook *hook, *found = NULL;
+ int n = 0;
- switch (event) {
- case NETDEV_UNREGISTER:
- if (strcmp(basechain->dev_name, dev->name) != 0)
- return;
-
- /* UNREGISTER events are also happpening on netns exit.
- *
- * Altough nf_tables core releases all tables/chains, only
- * this event handler provides guarantee that
- * basechain.ops->dev is still accessible, so we cannot
- * skip exiting net namespaces.
- */
- __nft_release_basechain(ctx);
- break;
- case NETDEV_CHANGENAME:
- if (dev->ifindex != basechain->ops.dev->ifindex)
- return;
+ if (event != NETDEV_UNREGISTER)
+ return;
- strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
- break;
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ if (hook->ops.dev == dev)
+ found = hook;
+
+ n++;
}
+ if (!found)
+ return;
+
+ if (n > 1) {
+ nf_unregister_net_hook(ctx->net, &found->ops);
+ list_del_rcu(&found->list);
+ kfree_rcu(found, rcu);
+ return;
+ }
+
+ /* UNREGISTER events are also happening on netns exit.
+ *
+ * Although nf_tables core releases all tables/chains, only this event
+ * handler provides guarantee that hook->ops.dev is still accessible,
+ * so we cannot skip exiting net namespaces.
+ */
+ __nft_release_basechain(ctx);
}
static int nf_tables_netdev_event(struct notifier_block *this,
diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c
index be7798a50546..713fb38541df 100644
--- a/net/netfilter/xt_HMARK.c
+++ b/net/netfilter/xt_HMARK.c
@@ -239,11 +239,7 @@ static int get_inner_hdr(const struct sk_buff *skb, int iphsz, int *nhoff)
return 0;
/* Error message? */
- if (icmph->type != ICMP_DEST_UNREACH &&
- icmph->type != ICMP_SOURCE_QUENCH &&
- icmph->type != ICMP_TIME_EXCEEDED &&
- icmph->type != ICMP_PARAMETERPROB &&
- icmph->type != ICMP_REDIRECT)
+ if (!icmp_is_err(icmph->type))
return 0;
*nhoff += iphsz + sizeof(_ih);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index efccd1ac9a66..0522b2b1fd95 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -458,10 +458,63 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
}
EXPORT_SYMBOL(genlmsg_put);
+static struct genl_dumpit_info *genl_dumpit_info_alloc(void)
+{
+ return kmalloc(sizeof(struct genl_dumpit_info), GFP_KERNEL);
+}
+
+static void genl_dumpit_info_free(const struct genl_dumpit_info *info)
+{
+ kfree(info);
+}
+
+static struct nlattr **
+genl_family_rcv_msg_attrs_parse(const struct genl_family *family,
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ const struct genl_ops *ops,
+ int hdrlen,
+ enum genl_validate_flags no_strict_flag,
+ bool parallel)
+{
+ enum netlink_validation validate = ops->validate & no_strict_flag ?
+ NL_VALIDATE_LIBERAL :
+ NL_VALIDATE_STRICT;
+ struct nlattr **attrbuf;
+ int err;
+
+ if (!family->maxattr)
+ return NULL;
+
+ if (parallel) {
+ attrbuf = kmalloc_array(family->maxattr + 1,
+ sizeof(struct nlattr *), GFP_KERNEL);
+ if (!attrbuf)
+ return ERR_PTR(-ENOMEM);
+ } else {
+ attrbuf = family->attrbuf;
+ }
+
+ err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
+ family->policy, validate, extack);
+ if (err && parallel) {
+ kfree(attrbuf);
+ return ERR_PTR(err);
+ }
+ return attrbuf;
+}
+
+static void genl_family_rcv_msg_attrs_free(const struct genl_family *family,
+ struct nlattr **attrbuf,
+ bool parallel)
+{
+ if (parallel)
+ kfree(attrbuf);
+}
+
static int genl_lock_start(struct netlink_callback *cb)
{
- /* our ops are always const - netlink API doesn't propagate that */
- const struct genl_ops *ops = cb->data;
+ const struct genl_ops *ops = genl_dumpit_info(cb)->ops;
int rc = 0;
if (ops->start) {
@@ -474,8 +527,7 @@ static int genl_lock_start(struct netlink_callback *cb)
static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
- /* our ops are always const - netlink API doesn't propagate that */
- const struct genl_ops *ops = cb->data;
+ const struct genl_ops *ops = genl_dumpit_info(cb)->ops;
int rc;
genl_lock();
@@ -486,8 +538,8 @@ static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
static int genl_lock_done(struct netlink_callback *cb)
{
- /* our ops are always const - netlink API doesn't propagate that */
- const struct genl_ops *ops = cb->data;
+ const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+ const struct genl_ops *ops = info->ops;
int rc = 0;
if (ops->done) {
@@ -495,120 +547,111 @@ static int genl_lock_done(struct netlink_callback *cb)
rc = ops->done(cb);
genl_unlock();
}
+ genl_family_rcv_msg_attrs_free(info->family, info->attrs, true);
+ genl_dumpit_info_free(info);
return rc;
}
-static int genl_family_rcv_msg(const struct genl_family *family,
- struct sk_buff *skb,
- struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
+static int genl_parallel_done(struct netlink_callback *cb)
{
- const struct genl_ops *ops;
- struct net *net = sock_net(skb->sk);
- struct genl_info info;
- struct genlmsghdr *hdr = nlmsg_data(nlh);
- struct nlattr **attrbuf;
- int hdrlen, err;
+ const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+ const struct genl_ops *ops = info->ops;
+ int rc = 0;
- /* this family doesn't exist in this netns */
- if (!family->netnsok && !net_eq(net, &init_net))
- return -ENOENT;
+ if (ops->done)
+ rc = ops->done(cb);
+ genl_family_rcv_msg_attrs_free(info->family, info->attrs, true);
+ genl_dumpit_info_free(info);
+ return rc;
+}
- hdrlen = GENL_HDRLEN + family->hdrsize;
- if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
- return -EINVAL;
+static int genl_family_rcv_msg_dumpit(const struct genl_family *family,
+ struct sk_buff *skb,
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ const struct genl_ops *ops,
+ int hdrlen, struct net *net)
+{
+ struct genl_dumpit_info *info;
+ struct nlattr **attrs = NULL;
+ int err;
- ops = genl_get_cmd(hdr->cmd, family);
- if (ops == NULL)
+ if (!ops->dumpit)
return -EOPNOTSUPP;
- if ((ops->flags & GENL_ADMIN_PERM) &&
- !netlink_capable(skb, CAP_NET_ADMIN))
- return -EPERM;
-
- if ((ops->flags & GENL_UNS_ADMIN_PERM) &&
- !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
- if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
- int rc;
-
- if (ops->dumpit == NULL)
- return -EOPNOTSUPP;
-
- if (!(ops->validate & GENL_DONT_VALIDATE_DUMP)) {
- int hdrlen = GENL_HDRLEN + family->hdrsize;
-
- if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
- return -EINVAL;
+ if (ops->validate & GENL_DONT_VALIDATE_DUMP)
+ goto no_attrs;
- if (family->maxattr) {
- unsigned int validate = NL_VALIDATE_STRICT;
-
- if (ops->validate &
- GENL_DONT_VALIDATE_DUMP_STRICT)
- validate = NL_VALIDATE_LIBERAL;
- rc = __nla_validate(nlmsg_attrdata(nlh, hdrlen),
- nlmsg_attrlen(nlh, hdrlen),
- family->maxattr,
- family->policy,
- validate, extack);
- if (rc)
- return rc;
- }
- }
+ if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
+ return -EINVAL;
- if (!family->parallel_ops) {
- struct netlink_dump_control c = {
- .module = family->module,
- /* we have const, but the netlink API doesn't */
- .data = (void *)ops,
- .start = genl_lock_start,
- .dump = genl_lock_dumpit,
- .done = genl_lock_done,
- };
+ attrs = genl_family_rcv_msg_attrs_parse(family, nlh, extack,
+ ops, hdrlen,
+ GENL_DONT_VALIDATE_DUMP_STRICT,
+ true);
+ if (IS_ERR(attrs))
+ return PTR_ERR(attrs);
+
+no_attrs:
+ /* Allocate dumpit info. It is going to be freed by done() callback. */
+ info = genl_dumpit_info_alloc();
+ if (!info) {
+ genl_family_rcv_msg_attrs_free(family, attrs, true);
+ return -ENOMEM;
+ }
- genl_unlock();
- rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
- genl_lock();
+ info->family = family;
+ info->ops = ops;
+ info->attrs = attrs;
- } else {
- struct netlink_dump_control c = {
- .module = family->module,
- .start = ops->start,
- .dump = ops->dumpit,
- .done = ops->done,
- };
+ if (!family->parallel_ops) {
+ struct netlink_dump_control c = {
+ .module = family->module,
+ .data = info,
+ .start = genl_lock_start,
+ .dump = genl_lock_dumpit,
+ .done = genl_lock_done,
+ };
- rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
- }
+ genl_unlock();
+ err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
+ genl_lock();
- return rc;
+ } else {
+ struct netlink_dump_control c = {
+ .module = family->module,
+ .data = info,
+ .start = ops->start,
+ .dump = ops->dumpit,
+ .done = genl_parallel_done,
+ };
+
+ err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
}
- if (ops->doit == NULL)
- return -EOPNOTSUPP;
-
- if (family->maxattr && family->parallel_ops) {
- attrbuf = kmalloc_array(family->maxattr + 1,
- sizeof(struct nlattr *),
- GFP_KERNEL);
- if (attrbuf == NULL)
- return -ENOMEM;
- } else
- attrbuf = family->attrbuf;
+ return err;
+}
- if (attrbuf) {
- enum netlink_validation validate = NL_VALIDATE_STRICT;
+static int genl_family_rcv_msg_doit(const struct genl_family *family,
+ struct sk_buff *skb,
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ const struct genl_ops *ops,
+ int hdrlen, struct net *net)
+{
+ struct nlattr **attrbuf;
+ struct genl_info info;
+ int err;
- if (ops->validate & GENL_DONT_VALIDATE_STRICT)
- validate = NL_VALIDATE_LIBERAL;
+ if (!ops->doit)
+ return -EOPNOTSUPP;
- err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
- family->policy, validate, extack);
- if (err < 0)
- goto out;
- }
+ attrbuf = genl_family_rcv_msg_attrs_parse(family, nlh, extack,
+ ops, hdrlen,
+ GENL_DONT_VALIDATE_STRICT,
+ family->parallel_ops);
+ if (IS_ERR(attrbuf))
+ return PTR_ERR(attrbuf);
info.snd_seq = nlh->nlmsg_seq;
info.snd_portid = NETLINK_CB(skb).portid;
@@ -632,12 +675,49 @@ static int genl_family_rcv_msg(const struct genl_family *family,
family->post_doit(ops, skb, &info);
out:
- if (family->parallel_ops)
- kfree(attrbuf);
+ genl_family_rcv_msg_attrs_free(family, attrbuf, family->parallel_ops);
return err;
}
+static int genl_family_rcv_msg(const struct genl_family *family,
+ struct sk_buff *skb,
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ const struct genl_ops *ops;
+ struct net *net = sock_net(skb->sk);
+ struct genlmsghdr *hdr = nlmsg_data(nlh);
+ int hdrlen;
+
+ /* this family doesn't exist in this netns */
+ if (!family->netnsok && !net_eq(net, &init_net))
+ return -ENOENT;
+
+ hdrlen = GENL_HDRLEN + family->hdrsize;
+ if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
+ return -EINVAL;
+
+ ops = genl_get_cmd(hdr->cmd, family);
+ if (ops == NULL)
+ return -EOPNOTSUPP;
+
+ if ((ops->flags & GENL_ADMIN_PERM) &&
+ !netlink_capable(skb, CAP_NET_ADMIN))
+ return -EPERM;
+
+ if ((ops->flags & GENL_UNS_ADMIN_PERM) &&
+ !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP)
+ return genl_family_rcv_msg_dumpit(family, skb, nlh, extack,
+ ops, hdrlen, net);
+ else
+ return genl_family_rcv_msg_doit(family, skb, nlh, extack,
+ ops, hdrlen, net);
+}
+
static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -1088,25 +1168,6 @@ problem:
subsys_initcall(genl_init);
-/**
- * genl_family_attrbuf - return family's attrbuf
- * @family: the family
- *
- * Return the family's attrbuf, while validating that it's
- * actually valid to access it.
- *
- * You cannot use this function with a family that has parallel_ops
- * and you can only use it within (pre/post) doit/dumpit callbacks.
- */
-struct nlattr **genl_family_attrbuf(const struct genl_family *family)
-{
- if (!WARN_ON(family->parallel_ops))
- lockdep_assert_held(&genl_mutex);
-
- return family->attrbuf;
-}
-EXPORT_SYMBOL(genl_family_attrbuf);
-
static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
gfp_t flags)
{
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index afde0d763039..eee0dddb7749 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -102,22 +102,14 @@ nla_put_failure:
static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb)
{
- struct nlattr **attrbuf = genl_family_attrbuf(&nfc_genl_family);
+ const struct genl_dumpit_info *info = genl_dumpit_info(cb);
struct nfc_dev *dev;
- int rc;
u32 idx;
- rc = nlmsg_parse_deprecated(cb->nlh,
- GENL_HDRLEN + nfc_genl_family.hdrsize,
- attrbuf, nfc_genl_family.maxattr,
- nfc_genl_policy, NULL);
- if (rc < 0)
- return ERR_PTR(rc);
-
- if (!attrbuf[NFC_ATTR_DEVICE_INDEX])
+ if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
return ERR_PTR(-EINVAL);
- idx = nla_get_u32(attrbuf[NFC_ATTR_DEVICE_INDEX]);
+ idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
dev = nfc_get_device(idx);
if (!dev)
@@ -1695,7 +1687,8 @@ static const struct genl_ops nfc_genl_ops[] = {
},
{
.cmd = NFC_CMD_GET_TARGET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
.dumpit = nfc_genl_dump_targets,
.done = nfc_genl_dump_targets_done,
},
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 1c77f520f474..12936c151cc0 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -200,7 +200,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
if (err)
return err;
- flow_key->mpls.top_lse = lse;
+ flow_key->mpls.lse[0] = lse;
return 0;
}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 05249eb45082..df9c80bf621d 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -971,6 +971,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
ct = nf_ct_get(skb, &ctinfo);
if (ct) {
+ bool add_helper = false;
+
/* Packets starting a new connection must be NATted before the
* helper, so that the helper knows about the NAT. We enforce
* this by delaying both NAT and helper calls for unconfirmed
@@ -988,16 +990,17 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
}
/* Userspace may decide to perform a ct lookup without a helper
- * specified followed by a (recirculate and) commit with one.
- * Therefore, for unconfirmed connections which we will commit,
- * we need to attach the helper here.
+ * specified followed by a (recirculate and) commit with one,
+ * or attach a helper in a later commit. Therefore, for
+ * connections which we will commit, we may need to attach
+ * the helper here.
*/
- if (!nf_ct_is_confirmed(ct) && info->commit &&
- info->helper && !nfct_help(ct)) {
+ if (info->commit && info->helper && !nfct_help(ct)) {
int err = __nf_ct_try_assign_helper(ct, info->ct,
GFP_ATOMIC);
if (err)
return err;
+ add_helper = true;
/* helper installed, add seqadj if NAT is required */
if (info->nat && !nfct_seqadj(ct)) {
@@ -1007,11 +1010,13 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
}
/* Call the helper only if:
- * - nf_conntrack_in() was executed above ("!cached") for a
- * confirmed connection, or
+ * - nf_conntrack_in() was executed above ("!cached") or a
+ * helper was just attached ("add_helper") for a confirmed
+ * connection, or
* - When committing an unconfirmed connection.
*/
- if ((nf_ct_is_confirmed(ct) ? !cached : info->commit) &&
+ if ((nf_ct_is_confirmed(ct) ? !cached || add_helper :
+ info->commit) &&
ovs_ct_helper(skb, info->family) != NF_ACCEPT) {
return -EINVAL;
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index d8c364d637b1..2088619c03f0 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -227,7 +227,8 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
stats = this_cpu_ptr(dp->stats_percpu);
/* Look up flow. */
- flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
+ flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
+ &n_mask_hit);
if (unlikely(!flow)) {
struct dp_upcall_info upcall;
@@ -1575,6 +1576,31 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
return 0;
}
+static int ovs_dp_stats_init(struct datapath *dp)
+{
+ dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
+ if (!dp->stats_percpu)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int ovs_dp_vport_init(struct datapath *dp)
+{
+ int i;
+
+ dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!dp->ports)
+ return -ENOMEM;
+
+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
+ INIT_HLIST_HEAD(&dp->ports[i]);
+
+ return 0;
+}
+
static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
@@ -1583,7 +1609,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
struct vport *vport;
struct ovs_net *ovs_net;
- int err, i;
+ int err;
err = -EINVAL;
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
@@ -1596,35 +1622,26 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
err = -ENOMEM;
dp = kzalloc(sizeof(*dp), GFP_KERNEL);
if (dp == NULL)
- goto err_free_reply;
+ goto err_destroy_reply;
ovs_dp_set_net(dp, sock_net(skb->sk));
/* Allocate table. */
err = ovs_flow_tbl_init(&dp->table);
if (err)
- goto err_free_dp;
+ goto err_destroy_dp;
- dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
- if (!dp->stats_percpu) {
- err = -ENOMEM;
+ err = ovs_dp_stats_init(dp);
+ if (err)
goto err_destroy_table;
- }
-
- dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
- sizeof(struct hlist_head),
- GFP_KERNEL);
- if (!dp->ports) {
- err = -ENOMEM;
- goto err_destroy_percpu;
- }
- for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
- INIT_HLIST_HEAD(&dp->ports[i]);
+ err = ovs_dp_vport_init(dp);
+ if (err)
+ goto err_destroy_stats;
err = ovs_meters_init(dp);
if (err)
- goto err_destroy_ports_array;
+ goto err_destroy_ports;
/* Set up our datapath device. */
parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
@@ -1656,6 +1673,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_dp_reset_user_features(skb, info);
}
+ ovs_unlock();
goto err_destroy_meters;
}
@@ -1672,17 +1690,16 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
return 0;
err_destroy_meters:
- ovs_unlock();
ovs_meters_exit(dp);
-err_destroy_ports_array:
+err_destroy_ports:
kfree(dp->ports);
-err_destroy_percpu:
+err_destroy_stats:
free_percpu(dp->stats_percpu);
err_destroy_table:
ovs_flow_tbl_destroy(&dp->table);
-err_free_dp:
+err_destroy_dp:
kfree(dp);
-err_free_reply:
+err_destroy_reply:
kfree_skb(reply);
err:
return err;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 38147e6a20f5..9d375e74b607 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -637,27 +637,35 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
memset(&key->ipv4, 0, sizeof(key->ipv4));
}
} else if (eth_p_mpls(key->eth.type)) {
- size_t stack_len = MPLS_HLEN;
+ u8 label_count = 1;
+ memset(&key->mpls, 0, sizeof(key->mpls));
skb_set_inner_network_header(skb, skb->mac_len);
while (1) {
__be32 lse;
- error = check_header(skb, skb->mac_len + stack_len);
+ error = check_header(skb, skb->mac_len +
+ label_count * MPLS_HLEN);
if (unlikely(error))
return 0;
memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);
- if (stack_len == MPLS_HLEN)
- memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
+ if (label_count <= MPLS_LABEL_DEPTH)
+ memcpy(&key->mpls.lse[label_count - 1], &lse,
+ MPLS_HLEN);
- skb_set_inner_network_header(skb, skb->mac_len + stack_len);
+ skb_set_inner_network_header(skb, skb->mac_len +
+ label_count * MPLS_HLEN);
if (lse & htonl(MPLS_LS_S_MASK))
break;
- stack_len += MPLS_HLEN;
+ label_count++;
}
+ if (label_count > MPLS_LABEL_DEPTH)
+ label_count = MPLS_LABEL_DEPTH;
+
+ key->mpls.num_labels_mask = GENMASK(label_count - 1, 0);
} else if (key->eth.type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index b830d5ff7af4..fd8ed766bdd1 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -30,6 +30,7 @@ enum sw_flow_mac_proto {
MAC_PROTO_ETHERNET,
};
#define SW_FLOW_KEY_INVALID 0x80
+#define MPLS_LABEL_DEPTH 3
/* Store options at the end of the array if they are less than the
* maximum size. This allows us to get the benefits of variable length
@@ -85,9 +86,6 @@ struct sw_flow_key {
*/
union {
struct {
- __be32 top_lse; /* top label stack entry */
- } mpls;
- struct {
u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
u8 tos; /* IP ToS. */
u8 ttl; /* IP TTL/hop limit. */
@@ -135,6 +133,11 @@ struct sw_flow_key {
} nd;
};
} ipv6;
+ struct {
+ u32 num_labels_mask; /* labels present bitmap of effective length MPLS_LABEL_DEPTH */
+ __be32 lse[MPLS_LABEL_DEPTH]; /* label stack entry */
+ } mpls;
+
struct ovs_key_nsh nsh; /* network service header */
};
struct {
@@ -166,7 +169,6 @@ struct sw_flow_key_range {
struct sw_flow_mask {
int ref_count;
struct rcu_head rcu;
- struct list_head list;
struct sw_flow_key_range range;
struct sw_flow_key key;
};
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index d7559c64795d..65c2e3458ff5 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -424,7 +424,7 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) },
[OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED,
.next = ovs_tunnel_key_lens, },
- [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) },
+ [OVS_KEY_ATTR_MPLS] = { .len = OVS_ATTR_VARIABLE },
[OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) },
[OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) },
[OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) },
@@ -1628,10 +1628,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
const struct ovs_key_mpls *mpls_key;
+ u32 hdr_len;
+ u32 label_count, label_count_mask, i;
mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
- SW_FLOW_KEY_PUT(match, mpls.top_lse,
- mpls_key->mpls_lse, is_mask);
+ hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]);
+ label_count = hdr_len / sizeof(struct ovs_key_mpls);
+
+ if (label_count == 0 || label_count > MPLS_LABEL_DEPTH ||
+ hdr_len % sizeof(struct ovs_key_mpls))
+ return -EINVAL;
+
+ label_count_mask = GENMASK(label_count - 1, 0);
+
+ for (i = 0 ; i < label_count; i++)
+ SW_FLOW_KEY_PUT(match, mpls.lse[i],
+ mpls_key[i].mpls_lse, is_mask);
+
+ SW_FLOW_KEY_PUT(match, mpls.num_labels_mask,
+ label_count_mask, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
}
@@ -2114,13 +2129,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
} else if (eth_p_mpls(swkey->eth.type)) {
+ u8 i, num_labels;
struct ovs_key_mpls *mpls_key;
- nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
+ num_labels = hweight_long(output->mpls.num_labels_mask);
+ nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS,
+ num_labels * sizeof(*mpls_key));
if (!nla)
goto nla_put_failure;
+
mpls_key = nla_data(nla);
- mpls_key->mpls_lse = output->mpls.top_lse;
+ for (i = 0; i < num_labels; i++)
+ mpls_key[i].mpls_lse = output->mpls.lse[i];
}
if ((swkey->eth.type == htons(ETH_P_IP) ||
@@ -2406,13 +2426,14 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
- __be16 eth_type, __be16 vlan_tci, bool log);
+ __be16 eth_type, __be16 vlan_tci,
+ u32 mpls_label_count, bool log);
static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- bool log, bool last)
+ u32 mpls_label_count, bool log, bool last)
{
const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
const struct nlattr *probability, *actions;
@@ -2463,7 +2484,7 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
return err;
err = __ovs_nla_copy_actions(net, actions, key, sfa,
- eth_type, vlan_tci, log);
+ eth_type, vlan_tci, mpls_label_count, log);
if (err)
return err;
@@ -2478,7 +2499,7 @@ static int validate_and_copy_clone(struct net *net,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- bool log, bool last)
+ u32 mpls_label_count, bool log, bool last)
{
int start, err;
u32 exec;
@@ -2498,7 +2519,7 @@ static int validate_and_copy_clone(struct net *net,
return err;
err = __ovs_nla_copy_actions(net, attr, key, sfa,
- eth_type, vlan_tci, log);
+ eth_type, vlan_tci, mpls_label_count, log);
if (err)
return err;
@@ -2864,6 +2885,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
+ u32 mpls_label_count,
bool log, bool last)
{
const struct nlattr *acts_if_greater, *acts_if_lesser_eq;
@@ -2912,7 +2934,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return nested_acts_start;
err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa,
- eth_type, vlan_tci, log);
+ eth_type, vlan_tci, mpls_label_count, log);
if (err)
return err;
@@ -2925,7 +2947,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return nested_acts_start;
err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa,
- eth_type, vlan_tci, log);
+ eth_type, vlan_tci, mpls_label_count, log);
if (err)
return err;
@@ -2952,7 +2974,8 @@ static int copy_action(const struct nlattr *from,
static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
- __be16 eth_type, __be16 vlan_tci, bool log)
+ __be16 eth_type, __be16 vlan_tci,
+ u32 mpls_label_count, bool log)
{
u8 mac_proto = ovs_key_mac_proto(key);
const struct nlattr *a;
@@ -3065,25 +3088,36 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
!eth_p_mpls(eth_type)))
return -EINVAL;
eth_type = mpls->mpls_ethertype;
+ mpls_label_count++;
break;
}
- case OVS_ACTION_ATTR_POP_MPLS:
+ case OVS_ACTION_ATTR_POP_MPLS: {
+ __be16 proto;
if (vlan_tci & htons(VLAN_CFI_MASK) ||
!eth_p_mpls(eth_type))
return -EINVAL;
- /* Disallow subsequent L2.5+ set and mpls_pop actions
- * as there is no check here to ensure that the new
- * eth_type is valid and thus set actions could
- * write off the end of the packet or otherwise
- * corrupt it.
+ /* Disallow subsequent L2.5+ set actions and mpls_pop
+ * actions once the last MPLS label in the packet is
+ * is popped as there is no check here to ensure that
+ * the new eth type is valid and thus set actions could
+ * write off the end of the packet or otherwise corrupt
+ * it.
*
* Support for these actions is planned using packet
* recirculation.
*/
- eth_type = htons(0);
+ proto = nla_get_be16(a);
+ mpls_label_count--;
+
+ if (!eth_p_mpls(proto) || !mpls_label_count)
+ eth_type = htons(0);
+ else
+ eth_type = proto;
+
break;
+ }
case OVS_ACTION_ATTR_SET:
err = validate_set(a, key, sfa,
@@ -3106,6 +3140,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
err = validate_and_copy_sample(net, a, key, sfa,
eth_type, vlan_tci,
+ mpls_label_count,
log, last);
if (err)
return err;
@@ -3176,6 +3211,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
err = validate_and_copy_clone(net, a, key, sfa,
eth_type, vlan_tci,
+ mpls_label_count,
log, last);
if (err)
return err;
@@ -3188,8 +3224,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
err = validate_and_copy_check_pkt_len(net, a, key, sfa,
eth_type,
- vlan_tci, log,
- last);
+ vlan_tci,
+ mpls_label_count,
+ log, last);
if (err)
return err;
skip_copy = true;
@@ -3219,14 +3256,18 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
struct sw_flow_actions **sfa, bool log)
{
int err;
+ u32 mpls_label_count = 0;
*sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
if (IS_ERR(*sfa))
return PTR_ERR(*sfa);
+ if (eth_p_mpls(key->eth.type))
+ mpls_label_count = hweight_long(key->mpls.num_labels_mask);
+
(*sfa)->orig_len = nla_len(attr);
err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
- key->eth.vlan.tci, log);
+ key->eth.vlan.tci, mpls_label_count, log);
if (err)
ovs_nla_free_flow_actions(*sfa);
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index cf3582c5ed70..5904e93e5765 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -34,8 +34,13 @@
#include <net/ndisc.h>
#define TBL_MIN_BUCKETS 1024
+#define MASK_ARRAY_SIZE_MIN 16
#define REHASH_INTERVAL (10 * 60 * HZ)
+#define MC_HASH_SHIFT 8
+#define MC_HASH_ENTRIES (1u << MC_HASH_SHIFT)
+#define MC_HASH_SEGS ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT)
+
static struct kmem_cache *flow_cache;
struct kmem_cache *flow_stats_cache __read_mostly;
@@ -164,14 +169,133 @@ static struct table_instance *table_instance_alloc(int new_size)
return ti;
}
+static struct mask_array *tbl_mask_array_alloc(int size)
+{
+ struct mask_array *new;
+
+ size = max(MASK_ARRAY_SIZE_MIN, size);
+ new = kzalloc(sizeof(struct mask_array) +
+ sizeof(struct sw_flow_mask *) * size, GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ new->count = 0;
+ new->max = size;
+
+ return new;
+}
+
+static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
+{
+ struct mask_array *old;
+ struct mask_array *new;
+
+ new = tbl_mask_array_alloc(size);
+ if (!new)
+ return -ENOMEM;
+
+ old = ovsl_dereference(tbl->mask_array);
+ if (old) {
+ int i;
+
+ for (i = 0; i < old->max; i++) {
+ if (ovsl_dereference(old->masks[i]))
+ new->masks[new->count++] = old->masks[i];
+ }
+ }
+
+ rcu_assign_pointer(tbl->mask_array, new);
+ kfree_rcu(old, rcu);
+
+ return 0;
+}
+
+static int tbl_mask_array_add_mask(struct flow_table *tbl,
+ struct sw_flow_mask *new)
+{
+ struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+ int err, ma_count = READ_ONCE(ma->count);
+
+ if (ma_count >= ma->max) {
+ err = tbl_mask_array_realloc(tbl, ma->max +
+ MASK_ARRAY_SIZE_MIN);
+ if (err)
+ return err;
+
+ ma = ovsl_dereference(tbl->mask_array);
+ }
+
+ BUG_ON(ovsl_dereference(ma->masks[ma_count]));
+
+ rcu_assign_pointer(ma->masks[ma_count], new);
+ WRITE_ONCE(ma->count, ma_count +1);
+
+ return 0;
+}
+
+static void tbl_mask_array_del_mask(struct flow_table *tbl,
+ struct sw_flow_mask *mask)
+{
+ struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+ int i, ma_count = READ_ONCE(ma->count);
+
+ /* Remove the deleted mask pointers from the array */
+ for (i = 0; i < ma_count; i++) {
+ if (mask == ovsl_dereference(ma->masks[i]))
+ goto found;
+ }
+
+ BUG();
+ return;
+
+found:
+ WRITE_ONCE(ma->count, ma_count -1);
+
+ rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]);
+ RCU_INIT_POINTER(ma->masks[ma_count -1], NULL);
+
+ kfree_rcu(mask, rcu);
+
+ /* Shrink the mask array if necessary. */
+ if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
+ ma_count <= (ma->max / 3))
+ tbl_mask_array_realloc(tbl, ma->max / 2);
+}
+
+/* Remove 'mask' from the mask list, if it is not needed any more. */
+static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
+{
+ if (mask) {
+ /* ovs-lock is required to protect mask-refcount and
+ * mask list.
+ */
+ ASSERT_OVSL();
+ BUG_ON(!mask->ref_count);
+ mask->ref_count--;
+
+ if (!mask->ref_count)
+ tbl_mask_array_del_mask(tbl, mask);
+ }
+}
+
int ovs_flow_tbl_init(struct flow_table *table)
{
struct table_instance *ti, *ufid_ti;
+ struct mask_array *ma;
- ti = table_instance_alloc(TBL_MIN_BUCKETS);
+ table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
+ MC_HASH_ENTRIES,
+ __alignof__(struct mask_cache_entry));
+ if (!table->mask_cache)
+ return -ENOMEM;
+ ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
+ if (!ma)
+ goto free_mask_cache;
+
+ ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ti)
- return -ENOMEM;
+ goto free_mask_array;
ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ufid_ti)
@@ -179,7 +303,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
rcu_assign_pointer(table->ti, ti);
rcu_assign_pointer(table->ufid_ti, ufid_ti);
- INIT_LIST_HEAD(&table->mask_list);
+ rcu_assign_pointer(table->mask_array, ma);
table->last_rehash = jiffies;
table->count = 0;
table->ufid_count = 0;
@@ -187,6 +311,10 @@ int ovs_flow_tbl_init(struct flow_table *table)
free_ti:
__table_instance_destroy(ti);
+free_mask_array:
+ kfree(ma);
+free_mask_cache:
+ free_percpu(table->mask_cache);
return -ENOMEM;
}
@@ -197,7 +325,28 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
__table_instance_destroy(ti);
}
-static void table_instance_destroy(struct table_instance *ti,
+static void table_instance_flow_free(struct flow_table *table,
+ struct table_instance *ti,
+ struct table_instance *ufid_ti,
+ struct sw_flow *flow,
+ bool count)
+{
+ hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
+ if (count)
+ table->count--;
+
+ if (ovs_identifier_is_ufid(&flow->id)) {
+ hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
+
+ if (count)
+ table->ufid_count--;
+ }
+
+ flow_mask_remove(table, flow->mask);
+}
+
+static void table_instance_destroy(struct flow_table *table,
+ struct table_instance *ti,
struct table_instance *ufid_ti,
bool deferred)
{
@@ -214,13 +363,12 @@ static void table_instance_destroy(struct table_instance *ti,
struct sw_flow *flow;
struct hlist_head *head = &ti->buckets[i];
struct hlist_node *n;
- int ver = ti->node_ver;
- int ufid_ver = ufid_ti->node_ver;
- hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) {
- hlist_del_rcu(&flow->flow_table.node[ver]);
- if (ovs_identifier_is_ufid(&flow->id))
- hlist_del_rcu(&flow->ufid_table.node[ufid_ver]);
+ hlist_for_each_entry_safe(flow, n, head,
+ flow_table.node[ti->node_ver]) {
+
+ table_instance_flow_free(table, ti, ufid_ti,
+ flow, false);
ovs_flow_free(flow, deferred);
}
}
@@ -243,7 +391,9 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
struct table_instance *ti = rcu_dereference_raw(table->ti);
struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
- table_instance_destroy(ti, ufid_ti, false);
+ free_percpu(table->mask_cache);
+ kfree_rcu(rcu_dereference_raw(table->mask_array), rcu);
+ table_instance_destroy(table, ti, ufid_ti, false);
}
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
@@ -359,7 +509,7 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
flow_table->count = 0;
flow_table->ufid_count = 0;
- table_instance_destroy(old_ti, old_ufid_ti, true);
+ table_instance_destroy(flow_table, old_ti, old_ufid_ti, true);
return 0;
err_free_ti:
@@ -370,13 +520,10 @@ err_free_ti:
static u32 flow_hash(const struct sw_flow_key *key,
const struct sw_flow_key_range *range)
{
- int key_start = range->start;
- int key_end = range->end;
- const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
- int hash_u32s = (key_end - key_start) >> 2;
+ const u32 *hash_key = (const u32 *)((const u8 *)key + range->start);
/* Make sure number of hash bytes are multiple of u32. */
- BUILD_BUG_ON(sizeof(long) % sizeof(u32));
+ int hash_u32s = range_n_bytes(range) >> 2;
return jhash2(hash_key, hash_u32s, 0);
}
@@ -425,7 +572,8 @@ static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
const struct sw_flow_key *unmasked,
- const struct sw_flow_mask *mask)
+ const struct sw_flow_mask *mask,
+ u32 *n_mask_hit)
{
struct sw_flow *flow;
struct hlist_head *head;
@@ -435,6 +583,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
ovs_flow_mask_key(&masked_key, unmasked, false, mask);
hash = flow_hash(&masked_key, &mask->range);
head = find_bucket(ti, hash);
+ (*n_mask_hit)++;
+
hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
if (flow->mask == mask && flow->flow_table.hash == hash &&
flow_cmp_masked_key(flow, &masked_key, &mask->range))
@@ -443,46 +593,147 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
return NULL;
}
-struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
- const struct sw_flow_key *key,
- u32 *n_mask_hit)
+/* Flow lookup does full lookup on flow table. It starts with
+ * mask from index passed in *index.
+ */
+static struct sw_flow *flow_lookup(struct flow_table *tbl,
+ struct table_instance *ti,
+ struct mask_array *ma,
+ const struct sw_flow_key *key,
+ u32 *n_mask_hit,
+ u32 *index)
{
- struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+ struct sw_flow *flow;
struct sw_flow_mask *mask;
+ int i;
+
+ if (likely(*index < ma->max)) {
+ mask = rcu_dereference_ovsl(ma->masks[*index]);
+ if (mask) {
+ flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
+ if (flow)
+ return flow;
+ }
+ }
+
+ for (i = 0; i < ma->max; i++) {
+
+ if (i == *index)
+ continue;
+
+ mask = rcu_dereference_ovsl(ma->masks[i]);
+ if (unlikely(!mask))
+ break;
+
+ flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
+ if (flow) { /* Found */
+ *index = i;
+ return flow;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * mask_cache maps flow to probable mask. This cache is not tightly
+ * coupled cache, It means updates to mask list can result in inconsistent
+ * cache entry in mask cache.
+ * This is per cpu cache and is divided in MC_HASH_SEGS segments.
+ * In case of a hash collision the entry is hashed in next segment.
+ * */
+struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
+ const struct sw_flow_key *key,
+ u32 skb_hash,
+ u32 *n_mask_hit)
+{
+ struct mask_array *ma = rcu_dereference(tbl->mask_array);
+ struct table_instance *ti = rcu_dereference(tbl->ti);
+ struct mask_cache_entry *entries, *ce;
struct sw_flow *flow;
+ u32 hash;
+ int seg;
*n_mask_hit = 0;
- list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
- (*n_mask_hit)++;
- flow = masked_flow_lookup(ti, key, mask);
- if (flow) /* Found */
+ if (unlikely(!skb_hash)) {
+ u32 mask_index = 0;
+
+ return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index);
+ }
+
+ /* Pre and post recirulation flows usually have the same skb_hash
+ * value. To avoid hash collisions, rehash the 'skb_hash' with
+ * 'recirc_id'. */
+ if (key->recirc_id)
+ skb_hash = jhash_1word(skb_hash, key->recirc_id);
+
+ ce = NULL;
+ hash = skb_hash;
+ entries = this_cpu_ptr(tbl->mask_cache);
+
+ /* Find the cache entry 'ce' to operate on. */
+ for (seg = 0; seg < MC_HASH_SEGS; seg++) {
+ int index = hash & (MC_HASH_ENTRIES - 1);
+ struct mask_cache_entry *e;
+
+ e = &entries[index];
+ if (e->skb_hash == skb_hash) {
+ flow = flow_lookup(tbl, ti, ma, key, n_mask_hit,
+ &e->mask_index);
+ if (!flow)
+ e->skb_hash = 0;
return flow;
+ }
+
+ if (!ce || e->skb_hash < ce->skb_hash)
+ ce = e; /* A better replacement cache candidate. */
+
+ hash >>= MC_HASH_SHIFT;
}
- return NULL;
+
+ /* Cache miss, do full lookup. */
+ flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index);
+ if (flow)
+ ce->skb_hash = skb_hash;
+
+ return flow;
}
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
const struct sw_flow_key *key)
{
+ struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+ struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
u32 __always_unused n_mask_hit;
+ u32 index = 0;
- return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit);
+ return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index);
}
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
const struct sw_flow_match *match)
{
- struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
- struct sw_flow_mask *mask;
- struct sw_flow *flow;
+ struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+ int i;
/* Always called under ovs-mutex. */
- list_for_each_entry(mask, &tbl->mask_list, list) {
- flow = masked_flow_lookup(ti, match->key, mask);
+ for (i = 0; i < ma->max; i++) {
+ struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+ u32 __always_unused n_mask_hit;
+ struct sw_flow_mask *mask;
+ struct sw_flow *flow;
+
+ mask = ovsl_dereference(ma->masks[i]);
+ if (!mask)
+ continue;
+
+ flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);
if (flow && ovs_identifier_is_key(&flow->id) &&
- ovs_flow_cmp_unmasked_key(flow, match))
+ ovs_flow_cmp_unmasked_key(flow, match)) {
return flow;
+ }
}
+
return NULL;
}
@@ -528,13 +779,8 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
int ovs_flow_tbl_num_masks(const struct flow_table *table)
{
- struct sw_flow_mask *mask;
- int num = 0;
-
- list_for_each_entry(mask, &table->mask_list, list)
- num++;
-
- return num;
+ struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
+ return READ_ONCE(ma->count);
}
static struct table_instance *table_instance_expand(struct table_instance *ti,
@@ -543,24 +789,6 @@ static struct table_instance *table_instance_expand(struct table_instance *ti,
return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
}
-/* Remove 'mask' from the mask list, if it is not needed any more. */
-static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
-{
- if (mask) {
- /* ovs-lock is required to protect mask-refcount and
- * mask list.
- */
- ASSERT_OVSL();
- BUG_ON(!mask->ref_count);
- mask->ref_count--;
-
- if (!mask->ref_count) {
- list_del_rcu(&mask->list);
- kfree_rcu(mask, rcu);
- }
- }
-}
-
/* Must be called with OVS mutex held. */
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
{
@@ -568,17 +796,7 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
BUG_ON(table->count == 0);
- hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
- table->count--;
- if (ovs_identifier_is_ufid(&flow->id)) {
- hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
- table->ufid_count--;
- }
-
- /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
- * accessible as long as the RCU read lock is held.
- */
- flow_mask_remove(table, flow->mask);
+ table_instance_flow_free(table, ti, ufid_ti, flow, true);
}
static struct sw_flow_mask *mask_alloc(void)
@@ -606,13 +824,16 @@ static bool mask_equal(const struct sw_flow_mask *a,
static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
const struct sw_flow_mask *mask)
{
- struct list_head *ml;
+ struct mask_array *ma;
+ int i;
+
+ ma = ovsl_dereference(tbl->mask_array);
+ for (i = 0; i < ma->max; i++) {
+ struct sw_flow_mask *t;
+ t = ovsl_dereference(ma->masks[i]);
- list_for_each(ml, &tbl->mask_list) {
- struct sw_flow_mask *m;
- m = container_of(ml, struct sw_flow_mask, list);
- if (mask_equal(mask, m))
- return m;
+ if (t && mask_equal(mask, t))
+ return t;
}
return NULL;
@@ -623,6 +844,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
const struct sw_flow_mask *new)
{
struct sw_flow_mask *mask;
+
mask = flow_mask_find(tbl, new);
if (!mask) {
/* Allocate a new mask if none exsits. */
@@ -631,7 +853,12 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
return -ENOMEM;
mask->key = new->key;
mask->range = new->range;
- list_add_rcu(&mask->list, &tbl->mask_list);
+
+ /* Add mask to mask-list. */
+ if (tbl_mask_array_add_mask(tbl, mask)) {
+ kfree(mask);
+ return -ENOMEM;
+ }
} else {
BUG_ON(!mask->ref_count);
mask->ref_count++;
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index bc52045b63ff..8a5cea6ae111 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -22,6 +22,17 @@
#include "flow.h"
+struct mask_cache_entry {
+ u32 skb_hash;
+ u32 mask_index;
+};
+
+struct mask_array {
+ struct rcu_head rcu;
+ int count, max;
+ struct sw_flow_mask __rcu *masks[];
+};
+
struct table_instance {
struct hlist_head *buckets;
unsigned int n_buckets;
@@ -34,7 +45,8 @@ struct table_instance {
struct flow_table {
struct table_instance __rcu *ti;
struct table_instance __rcu *ufid_ti;
- struct list_head mask_list;
+ struct mask_cache_entry __percpu *mask_cache;
+ struct mask_array __rcu *mask_array;
unsigned long last_rehash;
unsigned int count;
unsigned int ufid_count;
@@ -60,8 +72,9 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table);
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
u32 *bucket, u32 *idx);
struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
- const struct sw_flow_key *,
- u32 *n_mask_hit);
+ const struct sw_flow_key *,
+ u32 skb_hash,
+ u32 *n_mask_hit);
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
const struct sw_flow_key *);
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 3fc38d16c456..5da9392b03d6 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -403,8 +403,9 @@ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb)
ids = rcu_dereference(vport->upcall_portids);
- if (ids->n_ids == 1 && ids->ids[0] == 0)
- return 0;
+ /* If there is only one portid, select it in the fast-path. */
+ if (ids->n_ids == 1)
+ return ids->ids[0];
hash = skb_get_hash(skb);
ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 82a50e850245..53c1d41fb1c9 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1295,15 +1295,21 @@ static void packet_sock_destruct(struct sock *sk)
static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb)
{
- u32 rxhash;
+ u32 *history = po->rollover->history;
+ u32 victim, rxhash;
int i, count = 0;
rxhash = skb_get_hash(skb);
for (i = 0; i < ROLLOVER_HLEN; i++)
- if (po->rollover->history[i] == rxhash)
+ if (READ_ONCE(history[i]) == rxhash)
count++;
- po->rollover->history[prandom_u32() % ROLLOVER_HLEN] = rxhash;
+ victim = prandom_u32() % ROLLOVER_HLEN;
+
+ /* Avoid dirtying the cache line if possible */
+ if (READ_ONCE(history[victim]) != rxhash)
+ WRITE_ONCE(history[victim], rxhash);
+
return count > (ROLLOVER_HLEN >> 1);
}
diff --git a/net/qrtr/tun.c b/net/qrtr/tun.c
index e35869e81766..15ce9b642b25 100644
--- a/net/qrtr/tun.c
+++ b/net/qrtr/tun.c
@@ -111,15 +111,11 @@ static __poll_t qrtr_tun_poll(struct file *filp, poll_table *wait)
static int qrtr_tun_release(struct inode *inode, struct file *filp)
{
struct qrtr_tun *tun = filp->private_data;
- struct sk_buff *skb;
qrtr_endpoint_unregister(&tun->ep);
/* Discard all SKBs */
- while (!skb_queue_empty(&tun->queue)) {
- skb = skb_dequeue(&tun->queue);
- kfree_skb(skb);
- }
+ skb_queue_purge(&tun->queue);
kfree(tun);
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 9de2ae22d583..3fd5f40189bd 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -30,6 +30,7 @@
* SOFTWARE.
*
*/
+#include <linux/dmapool.h>
#include <linux/kernel.h>
#include <linux/in.h>
#include <linux/if.h>
@@ -107,6 +108,7 @@ static void rds_ib_dev_free(struct work_struct *work)
rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool);
if (rds_ibdev->pd)
ib_dealloc_pd(rds_ibdev->pd);
+ dma_pool_destroy(rds_ibdev->rid_hdrs_pool);
list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) {
list_del(&i_ipaddr->list);
@@ -182,6 +184,12 @@ static void rds_ib_add_one(struct ib_device *device)
rds_ibdev->pd = NULL;
goto put_dev;
}
+ rds_ibdev->rid_hdrs_pool = dma_pool_create(device->name,
+ device->dma_device,
+ sizeof(struct rds_header),
+ L1_CACHE_BYTES, 0);
+ if (!rds_ibdev->rid_hdrs_pool)
+ goto put_dev;
rds_ibdev->mr_1m_pool =
rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL);
diff --git a/net/rds/ib.h b/net/rds/ib.h
index f2b558e8b5ea..6e6f24753998 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -165,8 +165,8 @@ struct rds_ib_connection {
/* tx */
struct rds_ib_work_ring i_send_ring;
struct rm_data_op *i_data_op;
- struct rds_header *i_send_hdrs;
- dma_addr_t i_send_hdrs_dma;
+ struct rds_header **i_send_hdrs;
+ dma_addr_t *i_send_hdrs_dma;
struct rds_ib_send_work *i_sends;
atomic_t i_signaled_sends;
@@ -175,8 +175,8 @@ struct rds_ib_connection {
struct rds_ib_work_ring i_recv_ring;
struct rds_ib_incoming *i_ibinc;
u32 i_recv_data_rem;
- struct rds_header *i_recv_hdrs;
- dma_addr_t i_recv_hdrs_dma;
+ struct rds_header **i_recv_hdrs;
+ dma_addr_t *i_recv_hdrs_dma;
struct rds_ib_recv_work *i_recvs;
u64 i_ack_recv; /* last ACK received */
struct rds_ib_refill_cache i_cache_incs;
@@ -246,6 +246,7 @@ struct rds_ib_device {
struct list_head conn_list;
struct ib_device *dev;
struct ib_pd *pd;
+ struct dma_pool *rid_hdrs_pool; /* RDS headers DMA pool */
bool use_fastreg;
unsigned int max_mrs;
@@ -381,7 +382,11 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6);
void rds_ib_cm_connect_complete(struct rds_connection *conn,
struct rdma_cm_event *event);
-
+struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev,
+ struct dma_pool *pool,
+ dma_addr_t **dma_addrs, u32 num_hdrs);
+void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs,
+ dma_addr_t *dma_addrs, u32 num_hdrs);
#define rds_ib_conn_error(conn, fmt...) \
__rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt)
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 233f1368162b..6b345c858dba 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -30,6 +30,7 @@
* SOFTWARE.
*
*/
+#include <linux/dmapool.h>
#include <linux/kernel.h>
#include <linux/in.h>
#include <linux/slab.h>
@@ -439,6 +440,68 @@ static inline void ibdev_put_vector(struct rds_ib_device *rds_ibdev, int index)
rds_ibdev->vector_load[index]--;
}
+/* Allocate DMA coherent memory to be used to store struct rds_header for
+ * sending/receiving packets. The pointers to the DMA memory and the
+ * associated DMA addresses are stored in two arrays.
+ *
+ * @ibdev: the IB device
+ * @pool: the DMA memory pool
+ * @dma_addrs: pointer to the array for storing DMA addresses
+ * @num_hdrs: number of headers to allocate
+ *
+ * It returns the pointer to the array storing the DMA memory pointers. On
+ * error, NULL pointer is returned.
+ */
+struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev,
+ struct dma_pool *pool,
+ dma_addr_t **dma_addrs, u32 num_hdrs)
+{
+ struct rds_header **hdrs;
+ dma_addr_t *hdr_daddrs;
+ u32 i;
+
+ hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL,
+ ibdev_to_node(ibdev));
+ if (!hdrs)
+ return NULL;
+
+ hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL,
+ ibdev_to_node(ibdev));
+ if (!hdr_daddrs) {
+ kvfree(hdrs);
+ return NULL;
+ }
+
+ for (i = 0; i < num_hdrs; i++) {
+ hdrs[i] = dma_pool_zalloc(pool, GFP_KERNEL, &hdr_daddrs[i]);
+ if (!hdrs[i]) {
+ rds_dma_hdrs_free(pool, hdrs, hdr_daddrs, i);
+ return NULL;
+ }
+ }
+
+ *dma_addrs = hdr_daddrs;
+ return hdrs;
+}
+
+/* Free the DMA memory used to store struct rds_header.
+ *
+ * @pool: the DMA memory pool
+ * @hdrs: pointer to the array storing DMA memory pointers
+ * @dma_addrs: pointer to the array storing DMA addresses
+ * @num_hdars: number of headers to free.
+ */
+void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs,
+ dma_addr_t *dma_addrs, u32 num_hdrs)
+{
+ u32 i;
+
+ for (i = 0; i < num_hdrs; i++)
+ dma_pool_free(pool, hdrs[i], dma_addrs[i]);
+ kvfree(hdrs);
+ kvfree(dma_addrs);
+}
+
/*
* This needs to be very careful to not leave IS_ERR pointers around for
* cleanup to trip over.
@@ -451,6 +514,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
struct ib_cq_init_attr cq_attr = {};
struct rds_ib_device *rds_ibdev;
int ret, fr_queue_space;
+ struct dma_pool *pool;
/*
* It's normal to see a null device if an incoming connection races
@@ -541,31 +605,28 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
goto recv_cq_out;
}
- ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
- ic->i_send_ring.w_nr *
- sizeof(struct rds_header),
- &ic->i_send_hdrs_dma, GFP_KERNEL);
+ pool = rds_ibdev->rid_hdrs_pool;
+ ic->i_send_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_send_hdrs_dma,
+ ic->i_send_ring.w_nr);
if (!ic->i_send_hdrs) {
ret = -ENOMEM;
- rdsdebug("ib_dma_alloc_coherent send failed\n");
+ rdsdebug("DMA send hdrs alloc failed\n");
goto qp_out;
}
- ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
- ic->i_recv_ring.w_nr *
- sizeof(struct rds_header),
- &ic->i_recv_hdrs_dma, GFP_KERNEL);
+ ic->i_recv_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_recv_hdrs_dma,
+ ic->i_recv_ring.w_nr);
if (!ic->i_recv_hdrs) {
ret = -ENOMEM;
- rdsdebug("ib_dma_alloc_coherent recv failed\n");
+ rdsdebug("DMA recv hdrs alloc failed\n");
goto send_hdrs_dma_out;
}
- ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
- &ic->i_ack_dma, GFP_KERNEL);
+ ic->i_ack = dma_pool_zalloc(pool, GFP_KERNEL,
+ &ic->i_ack_dma);
if (!ic->i_ack) {
ret = -ENOMEM;
- rdsdebug("ib_dma_alloc_coherent ack failed\n");
+ rdsdebug("DMA ack header alloc failed\n");
goto recv_hdrs_dma_out;
}
@@ -596,17 +657,23 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
sends_out:
vfree(ic->i_sends);
+
ack_dma_out:
- ib_dma_free_coherent(dev, sizeof(struct rds_header),
- ic->i_ack, ic->i_ack_dma);
+ dma_pool_free(pool, ic->i_ack, ic->i_ack_dma);
+ ic->i_ack = NULL;
+
recv_hdrs_dma_out:
- ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr *
- sizeof(struct rds_header),
- ic->i_recv_hdrs, ic->i_recv_hdrs_dma);
+ rds_dma_hdrs_free(pool, ic->i_recv_hdrs, ic->i_recv_hdrs_dma,
+ ic->i_recv_ring.w_nr);
+ ic->i_recv_hdrs = NULL;
+ ic->i_recv_hdrs_dma = NULL;
+
send_hdrs_dma_out:
- ib_dma_free_coherent(dev, ic->i_send_ring.w_nr *
- sizeof(struct rds_header),
- ic->i_send_hdrs, ic->i_send_hdrs_dma);
+ rds_dma_hdrs_free(pool, ic->i_send_hdrs, ic->i_send_hdrs_dma,
+ ic->i_send_ring.w_nr);
+ ic->i_send_hdrs = NULL;
+ ic->i_send_hdrs_dma = NULL;
+
qp_out:
rdma_destroy_qp(ic->i_cm_id);
recv_cq_out:
@@ -984,8 +1051,6 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
ic->i_cm_id ? ic->i_cm_id->qp : NULL);
if (ic->i_cm_id) {
- struct ib_device *dev = ic->i_cm_id->device;
-
rdsdebug("disconnecting cm %p\n", ic->i_cm_id);
err = rdma_disconnect(ic->i_cm_id);
if (err) {
@@ -1035,24 +1100,39 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
ib_destroy_cq(ic->i_recv_cq);
}
- /* then free the resources that ib callbacks use */
- if (ic->i_send_hdrs)
- ib_dma_free_coherent(dev,
- ic->i_send_ring.w_nr *
- sizeof(struct rds_header),
- ic->i_send_hdrs,
- ic->i_send_hdrs_dma);
-
- if (ic->i_recv_hdrs)
- ib_dma_free_coherent(dev,
- ic->i_recv_ring.w_nr *
- sizeof(struct rds_header),
- ic->i_recv_hdrs,
- ic->i_recv_hdrs_dma);
-
- if (ic->i_ack)
- ib_dma_free_coherent(dev, sizeof(struct rds_header),
- ic->i_ack, ic->i_ack_dma);
+ if (ic->rds_ibdev) {
+ struct dma_pool *pool;
+
+ pool = ic->rds_ibdev->rid_hdrs_pool;
+
+ /* then free the resources that ib callbacks use */
+ if (ic->i_send_hdrs) {
+ rds_dma_hdrs_free(pool, ic->i_send_hdrs,
+ ic->i_send_hdrs_dma,
+ ic->i_send_ring.w_nr);
+ ic->i_send_hdrs = NULL;
+ ic->i_send_hdrs_dma = NULL;
+ }
+
+ if (ic->i_recv_hdrs) {
+ rds_dma_hdrs_free(pool, ic->i_recv_hdrs,
+ ic->i_recv_hdrs_dma,
+ ic->i_recv_ring.w_nr);
+ ic->i_recv_hdrs = NULL;
+ ic->i_recv_hdrs_dma = NULL;
+ }
+
+ if (ic->i_ack) {
+ dma_pool_free(pool, ic->i_ack, ic->i_ack_dma);
+ ic->i_ack = NULL;
+ }
+ } else {
+ WARN_ON(ic->i_send_hdrs);
+ WARN_ON(ic->i_send_hdrs_dma);
+ WARN_ON(ic->i_recv_hdrs);
+ WARN_ON(ic->i_recv_hdrs_dma);
+ WARN_ON(ic->i_ack);
+ }
if (ic->i_sends)
rds_ib_send_clear_ring(ic);
@@ -1071,9 +1151,6 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
ic->i_pd = NULL;
ic->i_send_cq = NULL;
ic->i_recv_cq = NULL;
- ic->i_send_hdrs = NULL;
- ic->i_recv_hdrs = NULL;
- ic->i_ack = NULL;
}
BUG_ON(ic->rds_ibdev);
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index a0f99bbf362c..694d411dc72f 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -61,7 +61,7 @@ void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
recv->r_wr.num_sge = RDS_IB_RECV_SGE;
sge = &recv->r_sge[0];
- sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
+ sge->addr = ic->i_recv_hdrs_dma[i];
sge->length = sizeof(struct rds_header);
sge->lkey = ic->i_pd->local_dma_lkey;
@@ -343,7 +343,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
WARN_ON(ret != 1);
sge = &recv->r_sge[0];
- sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
+ sge->addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs];
sge->length = sizeof(struct rds_header);
sge = &recv->r_sge[1];
@@ -861,7 +861,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
}
data_len -= sizeof(struct rds_header);
- ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
+ ihdr = ic->i_recv_hdrs[recv - ic->i_recvs];
/* Validate the checksum. */
if (!rds_message_verify_checksum(ihdr)) {
@@ -993,10 +993,11 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
} else {
/* We expect errors as the qp is drained during shutdown */
if (rds_conn_up(conn) || rds_conn_connecting(conn))
- rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), disconnecting and reconnecting\n",
+ rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n",
&conn->c_laddr, &conn->c_faddr,
conn->c_tos, wc->status,
- ib_wc_status_msg(wc->status));
+ ib_wc_status_msg(wc->status),
+ wc->vendor_err);
}
/* rds_ib_process_recv() doesn't always consume the frag, and
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index dfe6237dafe2..d1cc1d7778d8 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -201,7 +201,8 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic)
send->s_wr.ex.imm_data = 0;
sge = &send->s_sge[0];
- sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
+ sge->addr = ic->i_send_hdrs_dma[i];
+
sge->length = sizeof(struct rds_header);
sge->lkey = ic->i_pd->local_dma_lkey;
@@ -300,10 +301,10 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
/* We expect errors as the qp is drained during shutdown */
if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
- rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), disconnecting and reconnecting\n",
+ rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n",
&conn->c_laddr, &conn->c_faddr,
conn->c_tos, wc->status,
- ib_wc_status_msg(wc->status));
+ ib_wc_status_msg(wc->status), wc->vendor_err);
}
}
@@ -631,11 +632,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
send->s_queued = jiffies;
send->s_op = NULL;
- send->s_sge[0].addr = ic->i_send_hdrs_dma
- + (pos * sizeof(struct rds_header));
+ send->s_sge[0].addr = ic->i_send_hdrs_dma[pos];
+
send->s_sge[0].length = sizeof(struct rds_header);
- memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
+ memcpy(ic->i_send_hdrs[pos], &rm->m_inc.i_hdr,
+ sizeof(struct rds_header));
+
/* Set up the data, if present */
if (i < work_alloc
@@ -674,7 +677,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
&send->s_wr, send->s_wr.num_sge, send->s_wr.next);
if (ic->i_flowctl && adv_credits) {
- struct rds_header *hdr = &ic->i_send_hdrs[pos];
+ struct rds_header *hdr = ic->i_send_hdrs[pos];
/* add credit and redo the header checksum */
hdr->h_credit = adv_credits;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 6a0df7c8a939..46b8ff24020d 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -906,7 +906,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags,
/* Now attach up the new socket */
skb->sk = NULL;
kfree_skb(skb);
- sk->sk_ack_backlog--;
+ sk_acceptq_removed(sk);
out_release:
release_sock(sk);
@@ -1011,7 +1011,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros
make_rose->va = 0;
make_rose->vr = 0;
make_rose->vl = 0;
- sk->sk_ack_backlog++;
+ sk_acceptq_added(sk);
rose_insert_socket(make);
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 64830d8c1fdb..452163eadb98 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -209,6 +209,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_sock *rx,
*/
struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
{
+ const void *here = __builtin_return_address(0);
struct rxrpc_peer *peer;
_enter("");
@@ -230,6 +231,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
peer->cong_cwnd = 3;
else
peer->cong_cwnd = 4;
+ trace_rxrpc_peer(peer->debug_id, rxrpc_peer_new, 1, here);
}
_leave(" = %p", peer);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 69d4676a402f..bda1ba25c59e 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -188,6 +188,8 @@ static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
+ nla_total_size(0) /* TCA_ACT_STATS nested */
/* TCA_STATS_BASIC */
+ nla_total_size_64bit(sizeof(struct gnet_stats_basic))
+ /* TCA_STATS_PKT64 */
+ + nla_total_size_64bit(sizeof(u64))
/* TCA_STATS_QUEUE */
+ nla_total_size_64bit(sizeof(struct gnet_stats_queue))
+ nla_total_size(0) /* TCA_OPTIONS nested */
@@ -399,7 +401,7 @@ static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)
int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
struct tc_action **a, const struct tc_action_ops *ops,
- int bind, bool cpustats)
+ int bind, bool cpustats, u32 flags)
{
struct tc_action *p = kzalloc(ops->size, GFP_KERNEL);
struct tcf_idrinfo *idrinfo = tn->idrinfo;
@@ -427,6 +429,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
p->tcfa_tm.install = jiffies;
p->tcfa_tm.lastuse = jiffies;
p->tcfa_tm.firstuse = 0;
+ p->tcfa_flags = flags;
if (est) {
err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
&p->tcfa_rate_est,
@@ -451,6 +454,17 @@ err1:
}
EXPORT_SYMBOL(tcf_idr_create);
+int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index,
+ struct nlattr *est, struct tc_action **a,
+ const struct tc_action_ops *ops, int bind,
+ u32 flags)
+{
+ /* Set cpustats according to actions flags. */
+ return tcf_idr_create(tn, index, est, a, ops, bind,
+ !(flags & TCA_ACT_FLAGS_NO_PERCPU_STATS), flags);
+}
+EXPORT_SYMBOL(tcf_idr_create_from_flags);
+
void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
{
struct tcf_idrinfo *idrinfo = tn->idrinfo;
@@ -773,6 +787,14 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
}
rcu_read_unlock();
+ if (a->tcfa_flags) {
+ struct nla_bitfield32 flags = { a->tcfa_flags,
+ a->tcfa_flags, };
+
+ if (nla_put(skb, TCA_ACT_FLAGS, sizeof(flags), &flags))
+ goto nla_put_failure;
+ }
+
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
@@ -831,12 +853,15 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
return c;
}
+static const u32 tca_act_flags_allowed = TCA_ACT_FLAGS_NO_PERCPU_STATS;
static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
[TCA_ACT_KIND] = { .type = NLA_STRING },
[TCA_ACT_INDEX] = { .type = NLA_U32 },
[TCA_ACT_COOKIE] = { .type = NLA_BINARY,
.len = TC_COOKIE_MAX_SIZE },
[TCA_ACT_OPTIONS] = { .type = NLA_NESTED },
+ [TCA_ACT_FLAGS] = { .type = NLA_BITFIELD32,
+ .validation_data = &tca_act_flags_allowed },
};
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
@@ -845,6 +870,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
bool rtnl_held,
struct netlink_ext_ack *extack)
{
+ struct nla_bitfield32 flags = { 0, 0 };
struct tc_action *a;
struct tc_action_ops *a_o;
struct tc_cookie *cookie = NULL;
@@ -876,6 +902,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
goto err_out;
}
}
+ if (tb[TCA_ACT_FLAGS])
+ flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
} else {
if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
NL_SET_ERR_MSG(extack, "TC action name too long");
@@ -914,10 +942,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
/* backward compatibility for policer */
if (name == NULL)
err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
- rtnl_held, tp, extack);
+ rtnl_held, tp, flags.value, extack);
else
err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
- tp, extack);
+ tp, flags.value, extack);
if (err < 0)
goto err_mod;
@@ -989,6 +1017,29 @@ err:
return err;
}
+void tcf_action_update_stats(struct tc_action *a, u64 bytes, u32 packets,
+ bool drop, bool hw)
+{
+ if (a->cpu_bstats) {
+ _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+
+ if (drop)
+ this_cpu_ptr(a->cpu_qstats)->drops += packets;
+
+ if (hw)
+ _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
+ bytes, packets);
+ return;
+ }
+
+ _bstats_update(&a->tcfa_bstats, bytes, packets);
+ if (drop)
+ a->tcfa_qstats.drops += packets;
+ if (hw)
+ _bstats_update(&a->tcfa_bstats_hw, bytes, packets);
+}
+EXPORT_SYMBOL(tcf_action_update_stats);
+
int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
int compat_mode)
{
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 04b7bd4ec751..46f47e58b3be 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -275,7 +275,8 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
static int tcf_bpf_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **act,
int replace, int bind, bool rtnl_held,
- struct tcf_proto *tp, struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
@@ -303,7 +304,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
ret = tcf_idr_check_alloc(tn, &index, act, bind);
if (!ret) {
ret = tcf_idr_create(tn, index, est, act,
- &act_bpf_ops, bind, true);
+ &act_bpf_ops, bind, true, 0);
if (ret < 0) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 2b43cacf82af..43a243081e7d 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -94,7 +94,7 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
static int tcf_connmark_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp,
+ struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
@@ -121,7 +121,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
ret = tcf_idr_check_alloc(tn, &index, a, bind);
if (!ret) {
ret = tcf_idr_create(tn, index, est, a,
- &act_connmark_ops, bind, false);
+ &act_connmark_ops, bind, false, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index d3cfad88dc3a..16e67e1c1db1 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -43,7 +43,7 @@ static struct tc_action_ops act_csum_ops;
static int tcf_csum_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
int bind, bool rtnl_held, struct tcf_proto *tp,
- struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
struct tcf_csum_params *params_new;
@@ -68,8 +68,8 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
index = parm->index;
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_csum_ops, bind, true);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_csum_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
@@ -580,7 +580,7 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a,
params = rcu_dereference_bh(p->params);
tcf_lastuse_update(&p->tcf_tm);
- bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
+ tcf_action_update_bstats(&p->common, skb);
action = READ_ONCE(p->tcf_action);
if (unlikely(action == TC_ACT_SHOT))
@@ -624,7 +624,7 @@ out:
return action;
drop:
- qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
+ tcf_action_inc_drop_qstats(&p->common);
action = TC_ACT_SHOT;
goto out;
}
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index fcc46025e790..68d6af56b243 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -465,11 +465,11 @@ out_push:
skb_push_rcsum(skb, nh_ofs);
out:
- bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb);
+ tcf_action_update_bstats(&c->common, skb);
return retval;
drop:
- qstats_drop_inc(this_cpu_ptr(a->cpu_qstats));
+ tcf_action_inc_drop_qstats(&c->common);
return TC_ACT_SHOT;
}
@@ -656,7 +656,7 @@ static int tcf_ct_fill_params(struct net *net,
static int tcf_ct_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int replace, int bind, bool rtnl_held,
- struct tcf_proto *tp,
+ struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ct_net_id);
@@ -688,8 +688,8 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
return err;
if (!err) {
- err = tcf_idr_create(tn, index, est, a,
- &act_ct_ops, bind, true);
+ err = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_ct_ops, bind, flags);
if (err) {
tcf_idr_cleanup(tn, index);
return err;
@@ -905,11 +905,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
{
struct tcf_ct *c = to_ct(a);
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
-
- if (hw)
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
- bytes, packets);
+ tcf_action_update_stats(a, bytes, packets, false, hw);
c->tcf_tm.lastuse = max_t(u64, c->tcf_tm.lastuse, lastuse);
}
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index 0dbcfd1dca7b..b1e601007242 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -153,7 +153,7 @@ static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = {
static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp,
+ struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
@@ -210,7 +210,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
ret = tcf_idr_create(tn, index, est, a,
- &act_ctinfo_ops, bind, false);
+ &act_ctinfo_ops, bind, false, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 324f1d1f6d47..416065772719 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -53,7 +53,8 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
static int tcf_gact_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp, struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
struct nlattr *tb[TCA_GACT_MAX + 1];
@@ -98,8 +99,8 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_gact_ops, bind, true);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_gact_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
@@ -161,9 +162,9 @@ static int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a,
action = gact_rand[ptype](gact);
}
#endif
- bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), skb);
+ tcf_action_update_bstats(&gact->common, skb);
if (action == TC_ACT_SHOT)
- qstats_drop_inc(this_cpu_ptr(gact->common.cpu_qstats));
+ tcf_action_inc_drop_qstats(&gact->common);
tcf_lastuse_update(&gact->tcf_tm);
@@ -177,15 +178,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
int action = READ_ONCE(gact->tcf_action);
struct tcf_t *tm = &gact->tcf_tm;
- _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes,
- packets);
- if (action == TC_ACT_SHOT)
- this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
-
- if (hw)
- _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats_hw),
- bytes, packets);
-
+ tcf_action_update_stats(a, bytes, packets, action == TC_ACT_SHOT, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 3a31e241c647..d562c88cccbe 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -465,7 +465,8 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
static int tcf_ife_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp, struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
struct nlattr *tb[TCA_IFE_MAX + 1];
@@ -522,7 +523,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, index, est, a, &act_ife_ops,
- bind, true);
+ bind, true, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
kfree(p);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 214a03d405cf..400a2cfe8452 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -95,7 +95,7 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
const struct tc_action_ops *ops, int ovr, int bind,
- struct tcf_proto *tp)
+ struct tcf_proto *tp, u32 flags)
{
struct tc_action_net *tn = net_generic(net, id);
struct nlattr *tb[TCA_IPT_MAX + 1];
@@ -144,7 +144,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, index, est, a, ops, bind,
- false);
+ false, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
@@ -205,19 +205,19 @@ err1:
static int tcf_ipt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
int bind, bool rtnl_held, struct tcf_proto *tp,
- struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
- bind, tp);
+ bind, tp, flags);
}
static int tcf_xt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
int bind, bool unlocked, struct tcf_proto *tp,
- struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
- bind, tp);
+ bind, tp, flags);
}
static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 08923b21e566..b6e1b5bbb4da 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -93,7 +93,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp,
- struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
struct nlattr *tb[TCA_MIRRED_MAX + 1];
@@ -148,8 +148,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
return -EINVAL;
}
- ret = tcf_idr_create(tn, index, est, a,
- &act_mirred_ops, bind, true);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_mirred_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
@@ -231,7 +231,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
}
tcf_lastuse_update(&m->tcf_tm);
- bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
+ tcf_action_update_bstats(&m->common, skb);
m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
m_eaction = READ_ONCE(m->tcfm_eaction);
@@ -289,8 +289,8 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
/* let's the caller reinsert the packet, if possible */
if (use_reinsert) {
res->ingress = want_ingress;
- res->qstats = this_cpu_ptr(m->common.cpu_qstats);
- skb_tc_reinsert(skb, res);
+ if (skb_tc_reinsert(skb, res))
+ tcf_action_inc_overlimit_qstats(&m->common);
__this_cpu_dec(mirred_rec_level);
return TC_ACT_CONSUMED;
}
@@ -303,7 +303,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
if (err) {
out:
- qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats));
+ tcf_action_inc_overlimit_qstats(&m->common);
if (tcf_mirred_is_act_redirect(m_eaction))
retval = TC_ACT_SHOT;
}
@@ -318,10 +318,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
struct tcf_mirred *m = to_mirred(a);
struct tcf_t *tm = &m->tcf_tm;
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
- if (hw)
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
- bytes, packets);
+ tcf_action_update_stats(a, bytes, packets, false, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index 4cf6c553bb0b..4d8c822b6aca 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -131,7 +131,8 @@ static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = {
static int tcf_mpls_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp, struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, mpls_net_id);
struct nlattr *tb[TCA_MPLS_MAX + 1];
@@ -224,7 +225,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, index, est, a,
- &act_mpls_ops, bind, true);
+ &act_mpls_ops, bind, true, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index ea4c5359e7df..855a6fa16a62 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -36,7 +36,7 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
struct tc_action **a, int ovr, int bind,
bool rtnl_held, struct tcf_proto *tp,
- struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
struct nlattr *tb[TCA_NAT_MAX + 1];
@@ -61,7 +61,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
ret = tcf_idr_create(tn, index, est, a,
- &act_nat_ops, bind, false);
+ &act_nat_ops, bind, false, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
@@ -206,9 +206,7 @@ static int tcf_nat_act(struct sk_buff *skb, const struct tc_action *a,
icmph = (void *)(skb_network_header(skb) + ihl);
- if ((icmph->type != ICMP_DEST_UNREACH) &&
- (icmph->type != ICMP_TIME_EXCEEDED) &&
- (icmph->type != ICMP_PARAMETERPROB))
+ if (!icmp_is_err(icmph->type))
break;
if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph) +
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index cdfaa79382a2..d5eff6ac17a9 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -137,7 +137,8 @@ nla_failure:
static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp, struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
struct nlattr *tb[TCA_PEDIT_MAX + 1];
@@ -190,7 +191,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
goto out_free;
}
ret = tcf_idr_create(tn, index, est, a,
- &act_pedit_ops, bind, false);
+ &act_pedit_ops, bind, false, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
goto out_free;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 89c04c52af3d..d96271590268 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -47,7 +47,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
static int tcf_police_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp,
+ struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
int ret = 0, tcfp_result = TC_ACT_OK, err, size;
@@ -87,7 +87,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, index, NULL, a,
- &act_police_ops, bind, true);
+ &act_police_ops, bind, true, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
@@ -294,10 +294,7 @@ static void tcf_police_stats_update(struct tc_action *a,
struct tcf_police *police = to_police(a);
struct tcf_t *tm = &police->tcf_tm;
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
- if (hw)
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
- bytes, packets);
+ tcf_action_update_stats(a, bytes, packets, false, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
@@ -345,10 +342,7 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
nla_put_u32(skb, TCA_POLICE_AVRATE, p->tcfp_ewma_rate))
goto nla_put_failure;
- t.install = jiffies_to_clock_t(jiffies - police->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - police->tcf_tm.lastuse);
- t.firstuse = jiffies_to_clock_t(jiffies - police->tcf_tm.firstuse);
- t.expires = jiffies_to_clock_t(police->tcf_tm.expires);
+ tcf_tm_dump(&t, &police->tcf_tm);
if (nla_put_64bit(skb, TCA_POLICE_TM, sizeof(t), &t, TCA_POLICE_PAD))
goto nla_put_failure;
spin_unlock_bh(&police->tcf_lock);
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 514456a0b9a8..29b23bfaf10d 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -36,7 +36,7 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
static int tcf_sample_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
int bind, bool rtnl_held, struct tcf_proto *tp,
- struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
struct nlattr *tb[TCA_SAMPLE_MAX + 1];
@@ -69,7 +69,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, index, est, a,
- &act_sample_ops, bind, true);
+ &act_sample_ops, bind, true, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 6120e56117ca..9813ca4006dd 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -35,7 +35,7 @@ static int tcf_simp_act(struct sk_buff *skb, const struct tc_action *a,
* Example if this was the 3rd packet and the string was "hello"
* then it would look like "hello_3" (without quotes)
*/
- pr_info("simple: %s_%d\n",
+ pr_info("simple: %s_%llu\n",
(char *)d->tcfd_defdata, d->tcf_bstats.packets);
spin_unlock(&d->tcf_lock);
return d->tcf_action;
@@ -86,7 +86,8 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
static int tcf_simp_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp, struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
struct nlattr *tb[TCA_DEF_MAX + 1];
@@ -127,7 +128,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, index, est, a,
- &act_simp_ops, bind, false);
+ &act_simp_ops, bind, false, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 6a8d3337c577..5f7ca7f89ca2 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -86,7 +86,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp,
+ struct tcf_proto *tp, u32 act_flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
@@ -165,7 +165,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, index, est, a,
- &act_skbedit_ops, bind, true);
+ &act_skbedit_ops, bind, true, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 888437f97ba6..39e6d94cfafb 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -79,7 +79,7 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp,
+ struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
@@ -143,7 +143,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, index, est, a,
- &act_skbmod_ops, bind, true);
+ &act_skbmod_ops, bind, true, 0);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 2f83a79f76aa..cb34e5d57aaa 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -31,7 +31,7 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
params = rcu_dereference_bh(t->params);
tcf_lastuse_update(&t->tcf_tm);
- bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
+ tcf_action_update_bstats(&t->common, skb);
action = READ_ONCE(t->tcf_action);
switch (params->tcft_action) {
@@ -208,7 +208,7 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p)
static int tunnel_key_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp,
+ struct tcf_proto *tp, u32 act_flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
@@ -347,8 +347,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
}
if (!exists) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_tunnel_key_ops, bind, true);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_tunnel_key_ops, bind,
+ act_flags);
if (ret) {
NL_SET_ERR_MSG(extack, "Cannot create TC IDR");
goto release_tun_meta;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 08aaf719a70f..b6939abc61eb 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -29,7 +29,7 @@ static int tcf_vlan_act(struct sk_buff *skb, const struct tc_action *a,
u16 tci;
tcf_lastuse_update(&v->tcf_tm);
- bstats_cpu_update(this_cpu_ptr(v->common.cpu_bstats), skb);
+ tcf_action_update_bstats(&v->common, skb);
/* Ensure 'data' points at mac_header prior calling vlan manipulating
* functions.
@@ -88,7 +88,7 @@ out:
return action;
drop:
- qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
+ tcf_action_inc_drop_qstats(&v->common);
return TC_ACT_SHOT;
}
@@ -102,7 +102,8 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
static int tcf_vlan_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct tcf_proto *tp, struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
struct nlattr *tb[TCA_VLAN_MAX + 1];
@@ -188,8 +189,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
action = parm->v_action;
if (!exists) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_vlan_ops, bind, true);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_vlan_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
@@ -307,10 +308,7 @@ static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u32 packets,
struct tcf_vlan *v = to_vlan(a);
struct tcf_t *tm = &v->tcf_tm;
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
- if (hw)
- _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
- bytes, packets);
+ tcf_action_update_stats(a, bytes, packets, false, hw);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 3177dcb17316..d99966a55c84 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -521,7 +521,7 @@ META_COLLECTOR(int_sk_ack_bl)
*err = -1;
return;
}
- dst->value = sk->sk_ack_backlog;
+ dst->value = READ_ONCE(sk->sk_ack_backlog);
}
META_COLLECTOR(int_sk_max_ack_bl)
@@ -532,7 +532,7 @@ META_COLLECTOR(int_sk_max_ack_bl)
*err = -1;
return;
}
- dst->value = sk->sk_max_ack_backlog;
+ dst->value = READ_ONCE(sk->sk_max_ack_backlog);
}
META_COLLECTOR(int_sk_prio)
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 98dd87ce1510..b1c7e726ce5d 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -530,8 +530,7 @@ begin:
fq_flow_set_throttled(q, f);
goto begin;
}
- if (time_next_packet &&
- (s64)(now - time_next_packet - q->ce_threshold) > 0) {
+ if ((s64)(now - time_next_packet - q->ce_threshold) > 0) {
INET_ECN_set_ce(skb);
q->stat_ce_mark++;
}
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index c261c0a18868..968519ff36e9 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -14,7 +14,6 @@
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/skbuff.h>
-#include <linux/jhash.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <net/netlink.h>
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 8769b4b8807d..5ab696efca95 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -382,13 +382,8 @@ void __qdisc_run(struct Qdisc *q)
int packets;
while (qdisc_restart(q, &packets)) {
- /*
- * Ordered by possible occurrence: Postpone processing if
- * 1. we've exceeded packet quota
- * 2. another process needs the CPU;
- */
quota -= packets;
- if (quota <= 0 || need_resched()) {
+ if (quota <= 0) {
__netif_schedule(q);
break;
}
@@ -657,7 +652,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
if (likely(skb)) {
qdisc_update_stats_at_dequeue(qdisc, skb);
} else {
- qdisc->empty = true;
+ WRITE_ONCE(qdisc->empty, true);
}
return skb;
@@ -1214,8 +1209,13 @@ void dev_deactivate_many(struct list_head *head)
/* Wait for outstanding qdisc_run calls. */
list_for_each_entry(dev, head, close_list) {
- while (some_qdisc_is_busy(dev))
- yield();
+ while (some_qdisc_is_busy(dev)) {
+ /* wait_event() would avoid this sleep-loop but would
+ * require expensive checks in the fast paths of packet
+ * processing which isn't worth it.
+ */
+ schedule_timeout_uninterruptible(1);
+ }
/* The new qdisc is assigned at this point so we can safely
* unwind stale skb lists and qdisc statistics
*/
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index d2ffc9a0ba3a..8f8d18abd013 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -86,6 +86,8 @@ static struct sctp_association *sctp_association_init(
*/
asoc->max_retrans = sp->assocparams.sasoc_asocmaxrxt;
asoc->pf_retrans = sp->pf_retrans;
+ asoc->ps_retrans = sp->ps_retrans;
+ asoc->pf_expose = sp->pf_expose;
asoc->rto_initial = msecs_to_jiffies(sp->rtoinfo.srto_initial);
asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max);
@@ -324,7 +326,7 @@ void sctp_association_free(struct sctp_association *asoc)
* socket.
*/
if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))
- sk->sk_ack_backlog--;
+ sk_acceptq_removed(sk);
}
/* Mark as dead, so other users can know this structure is
@@ -429,6 +431,8 @@ void sctp_assoc_set_primary(struct sctp_association *asoc,
changeover = 1 ;
asoc->peer.primary_path = transport;
+ sctp_ulpevent_nofity_peer_addr_change(transport,
+ SCTP_ADDR_MADE_PRIM, 0);
/* Set a default msg_name for events. */
memcpy(&asoc->peer.primary_addr, &transport->ipaddr,
@@ -569,6 +573,7 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
asoc->peer.transport_count--;
+ sctp_ulpevent_nofity_peer_addr_change(peer, SCTP_ADDR_REMOVED, 0);
sctp_transport_free(peer);
}
@@ -624,6 +629,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
/* And the partial failure retrans threshold */
peer->pf_retrans = asoc->pf_retrans;
+ /* And the primary path switchover retrans threshold */
+ peer->ps_retrans = asoc->ps_retrans;
/* Initialize the peer's SACK delay timeout based on the
* association configured value.
@@ -707,6 +714,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list);
asoc->peer.transport_count++;
+ sctp_ulpevent_nofity_peer_addr_change(peer, SCTP_ADDR_ADDED, 0);
+
/* If we do not yet have a primary path, set one. */
if (!asoc->peer.primary_path) {
sctp_assoc_set_primary(asoc, peer);
@@ -781,9 +790,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
enum sctp_transport_cmd command,
sctp_sn_error_t error)
{
- struct sctp_ulpevent *event;
- struct sockaddr_storage addr;
- int spc_state = 0;
+ int spc_state = SCTP_ADDR_AVAILABLE;
bool ulp_notify = true;
/* Record the transition on the transport. */
@@ -793,19 +800,13 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
* to heartbeat success, report the SCTP_ADDR_CONFIRMED
* state to the user, otherwise report SCTP_ADDR_AVAILABLE.
*/
- if (SCTP_UNCONFIRMED == transport->state &&
- SCTP_HEARTBEAT_SUCCESS == error)
- spc_state = SCTP_ADDR_CONFIRMED;
- else
- spc_state = SCTP_ADDR_AVAILABLE;
- /* Don't inform ULP about transition from PF to
- * active state and set cwnd to 1 MTU, see SCTP
- * Quick failover draft section 5.1, point 5
- */
- if (transport->state == SCTP_PF) {
+ if (transport->state == SCTP_PF &&
+ asoc->pf_expose != SCTP_PF_EXPOSE_ENABLE)
ulp_notify = false;
- transport->cwnd = asoc->pathmtu;
- }
+ else if (transport->state == SCTP_UNCONFIRMED &&
+ error == SCTP_HEARTBEAT_SUCCESS)
+ spc_state = SCTP_ADDR_CONFIRMED;
+
transport->state = SCTP_ACTIVE;
break;
@@ -814,19 +815,21 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
* to inactive state. Also, release the cached route since
* there may be a better route next time.
*/
- if (transport->state != SCTP_UNCONFIRMED)
+ if (transport->state != SCTP_UNCONFIRMED) {
transport->state = SCTP_INACTIVE;
- else {
+ spc_state = SCTP_ADDR_UNREACHABLE;
+ } else {
sctp_transport_dst_release(transport);
ulp_notify = false;
}
-
- spc_state = SCTP_ADDR_UNREACHABLE;
break;
case SCTP_TRANSPORT_PF:
transport->state = SCTP_PF;
- ulp_notify = false;
+ if (asoc->pf_expose != SCTP_PF_EXPOSE_ENABLE)
+ ulp_notify = false;
+ else
+ spc_state = SCTP_ADDR_POTENTIALLY_FAILED;
break;
default:
@@ -836,16 +839,9 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
/* Generate and send a SCTP_PEER_ADDR_CHANGE notification
* to the user.
*/
- if (ulp_notify) {
- memset(&addr, 0, sizeof(struct sockaddr_storage));
- memcpy(&addr, &transport->ipaddr,
- transport->af_specific->sockaddr_len);
-
- event = sctp_ulpevent_make_peer_addr_change(asoc, &addr,
- 0, spc_state, error, GFP_ATOMIC);
- if (event)
- asoc->stream.si->enqueue_event(&asoc->ulpq, event);
- }
+ if (ulp_notify)
+ sctp_ulpevent_nofity_peer_addr_change(transport,
+ spc_state, error);
/* Select new active and retran paths. */
sctp_select_active_and_retran_path(asoc);
@@ -1077,7 +1073,7 @@ void sctp_assoc_migrate(struct sctp_association *assoc, struct sock *newsk)
/* Decrement the backlog value for a TCP-style socket. */
if (sctp_style(oldsk, TCP))
- oldsk->sk_ack_backlog--;
+ sk_acceptq_removed(oldsk);
/* Release references to the old endpoint and the sock. */
sctp_endpoint_put(assoc->ep);
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index cc0405c79dfc..cc3ce5d80b08 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -75,41 +75,39 @@ static void sctp_datamsg_destroy(struct sctp_datamsg *msg)
struct list_head *pos, *temp;
struct sctp_chunk *chunk;
struct sctp_ulpevent *ev;
- int error = 0, notify;
-
- /* If we failed, we may need to notify. */
- notify = msg->send_failed ? -1 : 0;
+ int error, sent;
/* Release all references. */
list_for_each_safe(pos, temp, &msg->chunks) {
list_del_init(pos);
chunk = list_entry(pos, struct sctp_chunk, frag_list);
- /* Check whether we _really_ need to notify. */
- if (notify < 0) {
- asoc = chunk->asoc;
- if (msg->send_error)
- error = msg->send_error;
- else
- error = asoc->outqueue.error;
-
- notify = sctp_ulpevent_type_enabled(asoc->subscribe,
- SCTP_SEND_FAILED);
+
+ if (!msg->send_failed) {
+ sctp_chunk_put(chunk);
+ continue;
}
- /* Generate a SEND FAILED event only if enabled. */
- if (notify > 0) {
- int sent;
- if (chunk->has_tsn)
- sent = SCTP_DATA_SENT;
- else
- sent = SCTP_DATA_UNSENT;
+ asoc = chunk->asoc;
+ error = msg->send_error ?: asoc->outqueue.error;
+ sent = chunk->has_tsn ? SCTP_DATA_SENT : SCTP_DATA_UNSENT;
+ if (sctp_ulpevent_type_enabled(asoc->subscribe,
+ SCTP_SEND_FAILED)) {
ev = sctp_ulpevent_make_send_failed(asoc, chunk, sent,
error, GFP_ATOMIC);
if (ev)
asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
}
+ if (sctp_ulpevent_type_enabled(asoc->subscribe,
+ SCTP_SEND_FAILED_EVENT)) {
+ ev = sctp_ulpevent_make_send_failed_event(asoc, chunk,
+ sent, error,
+ GFP_ATOMIC);
+ if (ev)
+ asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+ }
+
sctp_chunk_put(chunk);
}
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index 0851166b9175..8a15146faaeb 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -425,8 +425,8 @@ static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
r->idiag_rqueue = atomic_read(&infox->asoc->rmem_alloc);
r->idiag_wqueue = infox->asoc->sndbuf_used;
} else {
- r->idiag_rqueue = sk->sk_ack_backlog;
- r->idiag_wqueue = sk->sk_max_ack_backlog;
+ r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog);
+ r->idiag_wqueue = READ_ONCE(sk->sk_max_ack_backlog);
}
if (infox->sctpinfo)
sctp_get_sctp_info(sk, infox->asoc, infox->sctpinfo);
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index ea53049d1db6..9d05b2e7bce2 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -164,7 +164,7 @@ void sctp_endpoint_add_asoc(struct sctp_endpoint *ep,
/* Increment the backlog value for a TCP-style listening socket. */
if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))
- sk->sk_ack_backlog++;
+ sk_acceptq_added(sk);
}
/* Free the endpoint structure. Delay cleanup until
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 08d14d86ecfb..fbbf19128c2d 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1217,9 +1217,15 @@ static int __net_init sctp_defaults_init(struct net *net)
/* Max.Burst - 4 */
net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST;
+ /* Disable of Primary Path Switchover by default */
+ net->sctp.ps_retrans = SCTP_PS_RETRANS_MAX;
+
/* Enable pf state by default */
net->sctp.pf_enable = 1;
+ /* Ignore pf exposure feature by default */
+ net->sctp.pf_expose = SCTP_PF_EXPOSE_UNSET;
+
/* Association.Max.Retrans - 10 attempts
* Path.Max.Retrans - 5 attempts (per destination address)
* Max.Init.Retransmits - 8 attempts
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index e52b2128e43b..acd737d4c0e0 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -567,6 +567,11 @@ static void sctp_do_8_2_transport_strike(struct sctp_cmd_seq *commands,
SCTP_FAILED_THRESHOLD);
}
+ if (transport->error_count > transport->ps_retrans &&
+ asoc->peer.primary_path == transport &&
+ asoc->peer.active_path != transport)
+ sctp_assoc_set_primary(asoc, asoc->peer.active_path);
+
/* E2) For the destination address for which the timer
* expires, set RTO <- RTO * 2 ("back off the timer"). The
* maximum value discussed in rule C7 above (RTO.max) may be
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ffd3262b7a41..83e4ca1fabda 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3943,18 +3943,22 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval,
*/
static int sctp_setsockopt_paddr_thresholds(struct sock *sk,
char __user *optval,
- unsigned int optlen)
+ unsigned int optlen, bool v2)
{
- struct sctp_paddrthlds val;
+ struct sctp_paddrthlds_v2 val;
struct sctp_transport *trans;
struct sctp_association *asoc;
+ int len;
- if (optlen < sizeof(struct sctp_paddrthlds))
+ len = v2 ? sizeof(val) : sizeof(struct sctp_paddrthlds);
+ if (optlen < len)
return -EINVAL;
- if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval,
- sizeof(struct sctp_paddrthlds)))
+ if (copy_from_user(&val, optval, len))
return -EFAULT;
+ if (v2 && val.spt_pathpfthld > val.spt_pathcpthld)
+ return -EINVAL;
+
if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) {
trans = sctp_addr_id2transport(sk, &val.spt_address,
val.spt_assoc_id);
@@ -3963,6 +3967,8 @@ static int sctp_setsockopt_paddr_thresholds(struct sock *sk,
if (val.spt_pathmaxrxt)
trans->pathmaxrxt = val.spt_pathmaxrxt;
+ if (v2)
+ trans->ps_retrans = val.spt_pathcpthld;
trans->pf_retrans = val.spt_pathpfthld;
return 0;
@@ -3978,17 +3984,23 @@ static int sctp_setsockopt_paddr_thresholds(struct sock *sk,
transports) {
if (val.spt_pathmaxrxt)
trans->pathmaxrxt = val.spt_pathmaxrxt;
+ if (v2)
+ trans->ps_retrans = val.spt_pathcpthld;
trans->pf_retrans = val.spt_pathpfthld;
}
if (val.spt_pathmaxrxt)
asoc->pathmaxrxt = val.spt_pathmaxrxt;
+ if (v2)
+ asoc->ps_retrans = val.spt_pathcpthld;
asoc->pf_retrans = val.spt_pathpfthld;
} else {
struct sctp_sock *sp = sctp_sk(sk);
if (val.spt_pathmaxrxt)
sp->pathmaxrxt = val.spt_pathmaxrxt;
+ if (v2)
+ sp->ps_retrans = val.spt_pathcpthld;
sp->pf_retrans = val.spt_pathpfthld;
}
@@ -4589,6 +4601,40 @@ out:
return retval;
}
+static int sctp_setsockopt_pf_expose(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EINVAL;
+
+ if (optlen != sizeof(params))
+ goto out;
+
+ if (copy_from_user(&params, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ if (params.assoc_value > SCTP_PF_EXPOSE_MAX)
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+ sctp_style(sk, UDP))
+ goto out;
+
+ if (asoc)
+ asoc->pf_expose = params.assoc_value;
+ else
+ sctp_sk(sk)->pf_expose = params.assoc_value;
+ retval = 0;
+
+out:
+ return retval;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4744,7 +4790,12 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
retval = sctp_setsockopt_auto_asconf(sk, optval, optlen);
break;
case SCTP_PEER_ADDR_THLDS:
- retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen);
+ retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen,
+ false);
+ break;
+ case SCTP_PEER_ADDR_THLDS_V2:
+ retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen,
+ true);
break;
case SCTP_RECVRCVINFO:
retval = sctp_setsockopt_recvrcvinfo(sk, optval, optlen);
@@ -4798,6 +4849,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_ECN_SUPPORTED:
retval = sctp_setsockopt_ecn_supported(sk, optval, optlen);
break;
+ case SCTP_EXPOSE_POTENTIALLY_FAILED_STATE:
+ retval = sctp_setsockopt_pf_expose(sk, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -5041,6 +5095,8 @@ static int sctp_init_sock(struct sock *sk)
sp->hbinterval = net->sctp.hb_interval;
sp->pathmaxrxt = net->sctp.max_retrans_path;
sp->pf_retrans = net->sctp.pf_retrans;
+ sp->ps_retrans = net->sctp.ps_retrans;
+ sp->pf_expose = net->sctp.pf_expose;
sp->pathmtu = 0; /* allow default discovery */
sp->sackdelay = net->sctp.sack_timeout;
sp->sackfreq = 2;
@@ -5521,8 +5577,16 @@ static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len,
transport = sctp_addr_id2transport(sk, &pinfo.spinfo_address,
pinfo.spinfo_assoc_id);
- if (!transport)
- return -EINVAL;
+ if (!transport) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ if (transport->state == SCTP_PF &&
+ transport->asoc->pf_expose == SCTP_PF_EXPOSE_DISABLE) {
+ retval = -EACCES;
+ goto out;
+ }
pinfo.spinfo_assoc_id = sctp_assoc2id(transport->asoc);
pinfo.spinfo_state = transport->state;
@@ -7170,18 +7234,19 @@ static int sctp_getsockopt_assoc_ids(struct sock *sk, int len,
* http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt
*/
static int sctp_getsockopt_paddr_thresholds(struct sock *sk,
- char __user *optval,
- int len,
- int __user *optlen)
+ char __user *optval, int len,
+ int __user *optlen, bool v2)
{
- struct sctp_paddrthlds val;
+ struct sctp_paddrthlds_v2 val;
struct sctp_transport *trans;
struct sctp_association *asoc;
+ int min;
- if (len < sizeof(struct sctp_paddrthlds))
+ min = v2 ? sizeof(val) : sizeof(struct sctp_paddrthlds);
+ if (len < min)
return -EINVAL;
- len = sizeof(struct sctp_paddrthlds);
- if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval, len))
+ len = min;
+ if (copy_from_user(&val, optval, len))
return -EFAULT;
if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) {
@@ -7192,6 +7257,7 @@ static int sctp_getsockopt_paddr_thresholds(struct sock *sk,
val.spt_pathmaxrxt = trans->pathmaxrxt;
val.spt_pathpfthld = trans->pf_retrans;
+ val.spt_pathcpthld = trans->ps_retrans;
goto out;
}
@@ -7204,11 +7270,13 @@ static int sctp_getsockopt_paddr_thresholds(struct sock *sk,
if (asoc) {
val.spt_pathpfthld = asoc->pf_retrans;
val.spt_pathmaxrxt = asoc->pathmaxrxt;
+ val.spt_pathcpthld = asoc->ps_retrans;
} else {
struct sctp_sock *sp = sctp_sk(sk);
val.spt_pathpfthld = sp->pf_retrans;
val.spt_pathmaxrxt = sp->pathmaxrxt;
+ val.spt_pathcpthld = sp->ps_retrans;
}
out:
@@ -7900,6 +7968,45 @@ out:
return retval;
}
+static int sctp_getsockopt_pf_expose(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(params)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+ sctp_style(sk, UDP)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ params.assoc_value = asoc ? asoc->pf_expose
+ : sctp_sk(sk)->pf_expose;
+
+ if (put_user(len, optlen))
+ goto out;
+
+ if (copy_to_user(optval, &params, len))
+ goto out;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
static int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -8049,7 +8156,12 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
retval = sctp_getsockopt_auto_asconf(sk, len, optval, optlen);
break;
case SCTP_PEER_ADDR_THLDS:
- retval = sctp_getsockopt_paddr_thresholds(sk, optval, len, optlen);
+ retval = sctp_getsockopt_paddr_thresholds(sk, optval, len,
+ optlen, false);
+ break;
+ case SCTP_PEER_ADDR_THLDS_V2:
+ retval = sctp_getsockopt_paddr_thresholds(sk, optval, len,
+ optlen, true);
break;
case SCTP_GET_ASSOC_STATS:
retval = sctp_getsockopt_assoc_stats(sk, len, optval, optlen);
@@ -8112,6 +8224,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
case SCTP_ECN_SUPPORTED:
retval = sctp_getsockopt_ecn_supported(sk, len, optval, optlen);
break;
+ case SCTP_EXPOSE_POTENTIALLY_FAILED_STATE:
+ retval = sctp_getsockopt_pf_expose(sk, len, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -8376,7 +8491,7 @@ static int sctp_listen_start(struct sock *sk, int backlog)
}
}
- sk->sk_max_ack_backlog = backlog;
+ WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
return sctp_hash_endpoint(ep);
}
@@ -8430,7 +8545,7 @@ int sctp_inet_listen(struct socket *sock, int backlog)
/* If we are already listening, just update the backlog */
if (sctp_sstate(sk, LISTENING))
- sk->sk_max_ack_backlog = backlog;
+ WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
else {
err = sctp_listen_start(sk, backlog);
if (err)
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 238cf1737576..4740aa70e652 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -34,6 +34,8 @@ static int rto_alpha_min = 0;
static int rto_beta_min = 0;
static int rto_alpha_max = 1000;
static int rto_beta_max = 1000;
+static int pf_expose_max = SCTP_PF_EXPOSE_MAX;
+static int ps_retrans_max = SCTP_PS_RETRANS_MAX;
static unsigned long max_autoclose_min = 0;
static unsigned long max_autoclose_max =
@@ -212,7 +214,16 @@ static struct ctl_table sctp_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_INT_MAX,
+ .extra2 = &init_net.sctp.ps_retrans,
+ },
+ {
+ .procname = "ps_retrans",
+ .data = &init_net.sctp.ps_retrans,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &init_net.sctp.pf_retrans,
+ .extra2 = &ps_retrans_max,
},
{
.procname = "sndbuf_policy",
@@ -318,6 +329,15 @@ static struct ctl_table sctp_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "pf_expose",
+ .data = &init_net.sctp.pf_expose,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &pf_expose_max,
+ },
{ /* sentinel */ }
};
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index e0cc1edf49a0..c82dbdcf13f2 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -238,7 +238,7 @@ fail:
* When a destination address on a multi-homed peer encounters a change
* an interface details event is sent.
*/
-struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change(
+static struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change(
const struct sctp_association *asoc,
const struct sockaddr_storage *aaddr,
int flags, int state, int error, gfp_t gfp)
@@ -336,6 +336,22 @@ fail:
return NULL;
}
+void sctp_ulpevent_nofity_peer_addr_change(struct sctp_transport *transport,
+ int state, int error)
+{
+ struct sctp_association *asoc = transport->asoc;
+ struct sockaddr_storage addr;
+ struct sctp_ulpevent *event;
+
+ memset(&addr, 0, sizeof(struct sockaddr_storage));
+ memcpy(&addr, &transport->ipaddr, transport->af_specific->sockaddr_len);
+
+ event = sctp_ulpevent_make_peer_addr_change(asoc, &addr, 0, state,
+ error, GFP_ATOMIC);
+ if (event)
+ asoc->stream.si->enqueue_event(&asoc->ulpq, event);
+}
+
/* Create and initialize an SCTP_REMOTE_ERROR notification.
*
* Note: This assumes that the chunk->skb->data already points to the
@@ -511,6 +527,45 @@ fail:
return NULL;
}
+struct sctp_ulpevent *sctp_ulpevent_make_send_failed_event(
+ const struct sctp_association *asoc, struct sctp_chunk *chunk,
+ __u16 flags, __u32 error, gfp_t gfp)
+{
+ struct sctp_send_failed_event *ssf;
+ struct sctp_ulpevent *event;
+ struct sk_buff *skb;
+ int len;
+
+ skb = skb_copy_expand(chunk->skb, sizeof(*ssf), 0, gfp);
+ if (!skb)
+ return NULL;
+
+ len = ntohs(chunk->chunk_hdr->length);
+ len -= sctp_datachk_len(&asoc->stream);
+
+ skb_pull(skb, sctp_datachk_len(&asoc->stream));
+ event = sctp_skb2event(skb);
+ sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize);
+
+ ssf = skb_push(skb, sizeof(*ssf));
+ ssf->ssf_type = SCTP_SEND_FAILED_EVENT;
+ ssf->ssf_flags = flags;
+ ssf->ssf_length = sizeof(*ssf) + len;
+ skb_trim(skb, ssf->ssf_length);
+ ssf->ssf_error = error;
+
+ ssf->ssfe_info.snd_sid = chunk->sinfo.sinfo_stream;
+ ssf->ssfe_info.snd_ppid = chunk->sinfo.sinfo_ppid;
+ ssf->ssfe_info.snd_context = chunk->sinfo.sinfo_context;
+ ssf->ssfe_info.snd_assoc_id = chunk->sinfo.sinfo_assoc_id;
+ ssf->ssfe_info.snd_flags = chunk->chunk_hdr->flags;
+
+ sctp_ulpevent_set_owner(event, asoc);
+ ssf->ssf_assoc_id = sctp_assoc2id(asoc);
+
+ return event;
+}
+
/* Create and initialize a SCTP_SHUTDOWN_EVENT notification.
*
* Socket Extensions for SCTP - draft-01
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 47946f489fd4..b7d9fd285c71 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -174,6 +174,7 @@ static int smc_release(struct socket *sock)
if (!sk)
goto out;
+ sock_hold(sk); /* sock_put below */
smc = smc_sk(sk);
/* cleanup for a dangling non-blocking connect */
@@ -196,6 +197,7 @@ static int smc_release(struct socket *sock)
sock->sk = NULL;
release_sock(sk);
+ sock_put(sk); /* sock_hold above */
sock_put(sk); /* final sock_put */
out:
return rc;
@@ -977,12 +979,14 @@ void smc_close_non_accepted(struct sock *sk)
{
struct smc_sock *smc = smc_sk(sk);
+ sock_hold(sk); /* sock_put below */
lock_sock(sk);
if (!sk->sk_lingertime)
/* wait for peer closing */
sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
__smc_release(smc);
release_sock(sk);
+ sock_put(sk); /* sock_hold above */
sock_put(sk); /* final sock_put */
}
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 878313f8d6c1..be11ba41190f 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -188,6 +188,7 @@ struct smc_connection {
* 0 for SMC-R, 32 for SMC-D
*/
u64 peer_token; /* SMC-D token of peer */
+ u8 killed : 1; /* abnormal termination */
};
struct smc_sock { /* smc sock container */
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index d0b0f4c865b4..7dc07ec2379b 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -63,7 +63,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
wr_rdma_buf,
(struct smc_wr_tx_pend_priv **)pend);
- if (!conn->alert_token_local)
+ if (conn->killed)
/* abnormal termination */
rc = -EPIPE;
return rc;
@@ -328,7 +328,7 @@ static void smcd_cdc_rx_tsklet(unsigned long data)
struct smcd_cdc_msg cdc;
struct smc_sock *smc;
- if (!conn)
+ if (!conn || conn->killed)
return;
data_cdc = (struct smcd_cdc_msg *)conn->rmb_desc->cpu_addr;
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index fc06720b53c1..d34e5adce2eb 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -13,6 +13,7 @@
#include <linux/sched/signal.h>
#include <net/sock.h>
+#include <net/tcp.h>
#include "smc.h"
#include "smc_tx.h"
@@ -65,8 +66,9 @@ static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
rc = sk_wait_event(sk, &timeout,
!smc_tx_prepared_sends(&smc->conn) ||
- (sk->sk_err == ECONNABORTED) ||
- (sk->sk_err == ECONNRESET),
+ sk->sk_err == ECONNABORTED ||
+ sk->sk_err == ECONNRESET ||
+ smc->conn.killed,
&wait);
if (rc)
break;
@@ -95,11 +97,13 @@ static int smc_close_final(struct smc_connection *conn)
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
else
conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;
+ if (conn->killed)
+ return -EPIPE;
return smc_cdc_get_slot_and_msg_send(conn);
}
-static int smc_close_abort(struct smc_connection *conn)
+int smc_close_abort(struct smc_connection *conn)
{
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
@@ -109,19 +113,15 @@ static int smc_close_abort(struct smc_connection *conn)
/* terminate smc socket abnormally - active abort
* link group is terminated, i.e. RDMA communication no longer possible
*/
-static void smc_close_active_abort(struct smc_sock *smc)
+void smc_close_active_abort(struct smc_sock *smc)
{
struct sock *sk = &smc->sk;
-
- struct smc_cdc_conn_state_flags *txflags =
- &smc->conn.local_tx_ctrl.conn_state_flags;
+ bool release_clcsock = false;
if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) {
sk->sk_err = ECONNABORTED;
- if (smc->clcsock && smc->clcsock->sk) {
- smc->clcsock->sk->sk_err = ECONNABORTED;
- smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
- }
+ if (smc->clcsock && smc->clcsock->sk)
+ tcp_abort(smc->clcsock->sk, ECONNABORTED);
}
switch (sk->sk_state) {
case SMC_ACTIVE:
@@ -129,35 +129,29 @@ static void smc_close_active_abort(struct smc_sock *smc)
release_sock(sk);
cancel_delayed_work_sync(&smc->conn.tx_work);
lock_sock(sk);
+ sk->sk_state = SMC_CLOSED;
sock_put(sk); /* passive closing */
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
- if (!smc_cdc_rxed_any_close(&smc->conn))
- sk->sk_state = SMC_PEERABORTWAIT;
- else
- sk->sk_state = SMC_CLOSED;
release_sock(sk);
cancel_delayed_work_sync(&smc->conn.tx_work);
lock_sock(sk);
+ sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* postponed passive closing */
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
- if (!txflags->peer_conn_closed) {
- /* just SHUTDOWN_SEND done */
- sk->sk_state = SMC_PEERABORTWAIT;
- } else {
- sk->sk_state = SMC_CLOSED;
- }
+ case SMC_PEERFINCLOSEWAIT:
+ sk->sk_state = SMC_CLOSED;
+ smc_conn_free(&smc->conn);
+ release_clcsock = true;
sock_put(sk); /* passive closing */
break;
case SMC_PROCESSABORT:
case SMC_APPFINCLOSEWAIT:
sk->sk_state = SMC_CLOSED;
break;
- case SMC_PEERFINCLOSEWAIT:
- sock_put(sk); /* passive closing */
- break;
case SMC_INIT:
case SMC_PEERABORTWAIT:
case SMC_CLOSED:
@@ -166,6 +160,12 @@ static void smc_close_active_abort(struct smc_sock *smc)
sock_set_flag(sk, SOCK_DEAD);
sk->sk_state_change(sk);
+
+ if (release_clcsock) {
+ release_sock(sk);
+ smc_clcsock_release(smc);
+ lock_sock(sk);
+ }
}
static inline bool smc_close_sent_any_close(struct smc_connection *conn)
@@ -215,8 +215,6 @@ again:
if (sk->sk_state == SMC_ACTIVE) {
/* send close request */
rc = smc_close_final(conn);
- if (rc)
- break;
sk->sk_state = SMC_PEERCLOSEWAIT1;
} else {
/* peer event has changed the state */
@@ -229,8 +227,6 @@ again:
!smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
- if (rc)
- break;
}
sk->sk_state = SMC_CLOSED;
break;
@@ -246,8 +242,6 @@ again:
goto again;
/* confirm close from peer */
rc = smc_close_final(conn);
- if (rc)
- break;
if (smc_cdc_rxed_any_close(conn)) {
/* peer has closed the socket already */
sk->sk_state = SMC_CLOSED;
@@ -263,8 +257,6 @@ again:
!smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
- if (rc)
- break;
}
/* peer sending PeerConnectionClosed will cause transition */
break;
@@ -272,10 +264,12 @@ again:
/* peer sending PeerConnectionClosed will cause transition */
break;
case SMC_PROCESSABORT:
- smc_close_abort(conn);
+ rc = smc_close_abort(conn);
sk->sk_state = SMC_CLOSED;
break;
case SMC_PEERABORTWAIT:
+ sk->sk_state = SMC_CLOSED;
+ break;
case SMC_CLOSED:
/* nothing to do, add tracing in future patch */
break;
@@ -344,12 +338,6 @@ static void smc_close_passive_work(struct work_struct *work)
lock_sock(sk);
old_state = sk->sk_state;
- if (!conn->alert_token_local) {
- /* abnormal termination */
- smc_close_active_abort(smc);
- goto wakeup;
- }
-
rxflags = &conn->local_rx_ctrl.conn_state_flags;
if (rxflags->peer_conn_abort) {
/* peer has not received all data */
@@ -451,8 +439,6 @@ again:
goto again;
/* send close wr request */
rc = smc_close_wr(conn);
- if (rc)
- break;
sk->sk_state = SMC_PEERCLOSEWAIT1;
break;
case SMC_APPCLOSEWAIT1:
@@ -466,8 +452,6 @@ again:
goto again;
/* confirm close from peer */
rc = smc_close_wr(conn);
- if (rc)
- break;
sk->sk_state = SMC_APPCLOSEWAIT2;
break;
case SMC_APPCLOSEWAIT2:
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
index e0e3b5df25d2..634fea2b7c95 100644
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h
@@ -24,5 +24,7 @@ int smc_close_active(struct smc_sock *smc);
int smc_close_shutdown_write(struct smc_sock *smc);
void smc_close_init(struct smc_sock *smc);
void smc_clcsock_release(struct smc_sock *smc);
+int smc_close_abort(struct smc_connection *conn);
+void smc_close_active_abort(struct smc_sock *smc);
#endif /* SMC_CLOSE_H */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 2ba97ff325a5..0d92456729ab 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -42,20 +42,40 @@ static struct smc_lgr_list smc_lgr_list = { /* established link groups */
static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
struct smc_buf_desc *buf_desc);
+/* return head of link group list and its lock for a given link group */
+static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
+ spinlock_t **lgr_lock)
+{
+ if (lgr->is_smcd) {
+ *lgr_lock = &lgr->smcd->lgr_lock;
+ return &lgr->smcd->lgr_list;
+ }
+
+ *lgr_lock = &smc_lgr_list.lock;
+ return &smc_lgr_list.list;
+}
+
static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
{
/* client link group creation always follows the server link group
* creation. For client use a somewhat higher removal delay time,
* otherwise there is a risk of out-of-sync link groups.
*/
- mod_delayed_work(system_wq, &lgr->free_work,
- (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
- SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV);
+ if (!lgr->freeing && !lgr->freefast) {
+ mod_delayed_work(system_wq, &lgr->free_work,
+ (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
+ SMC_LGR_FREE_DELAY_CLNT :
+ SMC_LGR_FREE_DELAY_SERV);
+ }
}
void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
{
- mod_delayed_work(system_wq, &lgr->free_work, SMC_LGR_FREE_DELAY_FAST);
+ if (!lgr->freeing && !lgr->freefast) {
+ lgr->freefast = 1;
+ mod_delayed_work(system_wq, &lgr->free_work,
+ SMC_LGR_FREE_DELAY_FAST);
+ }
}
/* Register connection's alert token in our lookup structure.
@@ -134,6 +154,7 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
__smc_lgr_unregister_conn(conn);
}
write_unlock_bh(&lgr->conns_lock);
+ conn->lgr = NULL;
}
/* Send delete link, either as client to request the initiation
@@ -157,48 +178,62 @@ static void smc_lgr_free_work(struct work_struct *work)
struct smc_link_group *lgr = container_of(to_delayed_work(work),
struct smc_link_group,
free_work);
+ spinlock_t *lgr_lock;
+ struct smc_link *lnk;
bool conns;
- spin_lock_bh(&smc_lgr_list.lock);
+ smc_lgr_list_head(lgr, &lgr_lock);
+ spin_lock_bh(lgr_lock);
+ if (lgr->freeing) {
+ spin_unlock_bh(lgr_lock);
+ return;
+ }
read_lock_bh(&lgr->conns_lock);
conns = RB_EMPTY_ROOT(&lgr->conns_all);
read_unlock_bh(&lgr->conns_lock);
if (!conns) { /* number of lgr connections is no longer zero */
- spin_unlock_bh(&smc_lgr_list.lock);
+ spin_unlock_bh(lgr_lock);
return;
}
- if (!list_empty(&lgr->list))
- list_del_init(&lgr->list); /* remove from smc_lgr_list */
- spin_unlock_bh(&smc_lgr_list.lock);
+ list_del_init(&lgr->list); /* remove from smc_lgr_list */
+ lnk = &lgr->lnk[SMC_SINGLE_LINK];
if (!lgr->is_smcd && !lgr->terminating) {
- struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
-
/* try to send del link msg, on error free lgr immediately */
if (lnk->state == SMC_LNK_ACTIVE &&
!smc_link_send_delete(lnk)) {
/* reschedule in case we never receive a response */
smc_lgr_schedule_free_work(lgr);
+ spin_unlock_bh(lgr_lock);
return;
}
}
+ lgr->freeing = 1; /* this instance does the freeing, no new schedule */
+ spin_unlock_bh(lgr_lock);
+ cancel_delayed_work(&lgr->free_work);
- if (!delayed_work_pending(&lgr->free_work)) {
- struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+ if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
+ smc_llc_link_inactive(lnk);
+ if (lgr->is_smcd)
+ smc_ism_signal_shutdown(lgr);
+ smc_lgr_free(lgr);
+}
- if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
- smc_llc_link_inactive(lnk);
- if (lgr->is_smcd)
- smc_ism_signal_shutdown(lgr);
- smc_lgr_free(lgr);
- }
+static void smc_lgr_terminate_work(struct work_struct *work)
+{
+ struct smc_link_group *lgr = container_of(work, struct smc_link_group,
+ terminate_work);
+
+ smc_lgr_terminate(lgr);
}
/* create a new SMC link group */
static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
{
struct smc_link_group *lgr;
+ struct list_head *lgr_list;
struct smc_link *lnk;
+ spinlock_t *lgr_lock;
u8 rndvec[3];
int rc = 0;
int i;
@@ -217,6 +252,9 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
}
lgr->is_smcd = ini->is_smcd;
lgr->sync_err = 0;
+ lgr->terminating = 0;
+ lgr->freefast = 0;
+ lgr->freeing = 0;
lgr->vlan_id = ini->vlan_id;
rwlock_init(&lgr->sndbufs_lock);
rwlock_init(&lgr->rmbs_lock);
@@ -228,13 +266,18 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
smc_lgr_list.num += SMC_LGR_NUM_INCR;
memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
+ INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
lgr->conns_all = RB_ROOT;
if (ini->is_smcd) {
/* SMC-D specific settings */
+ get_device(&ini->ism_dev->dev);
lgr->peer_gid = ini->ism_gid;
lgr->smcd = ini->ism_dev;
+ lgr_list = &ini->ism_dev->lgr_list;
+ lgr_lock = &lgr->smcd->lgr_lock;
} else {
/* SMC-R specific settings */
+ get_device(&ini->ib_dev->ibdev->dev);
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
SMC_SYSTEMID_LEN);
@@ -245,6 +288,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lnk->link_id = SMC_SINGLE_LINK;
lnk->smcibdev = ini->ib_dev;
lnk->ibport = ini->ib_port;
+ lgr_list = &smc_lgr_list.list;
+ lgr_lock = &smc_lgr_list.lock;
lnk->path_mtu =
ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
if (!ini->ib_dev->initialized)
@@ -274,9 +319,9 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
goto destroy_qp;
}
smc->conn.lgr = lgr;
- spin_lock_bh(&smc_lgr_list.lock);
- list_add(&lgr->list, &smc_lgr_list.list);
- spin_unlock_bh(&smc_lgr_list.lock);
+ spin_lock_bh(lgr_lock);
+ list_add(&lgr->list, lgr_list);
+ spin_unlock_bh(lgr_lock);
return 0;
destroy_qp:
@@ -309,7 +354,7 @@ static void smc_buf_unuse(struct smc_connection *conn,
conn->sndbuf_desc->used = 0;
if (conn->rmb_desc) {
if (!conn->rmb_desc->regerr) {
- if (!lgr->is_smcd) {
+ if (!lgr->is_smcd && !list_empty(&lgr->list)) {
/* unregister rmb with peer */
smc_llc_do_delete_rkey(
&lgr->lnk[SMC_SINGLE_LINK],
@@ -340,9 +385,10 @@ void smc_conn_free(struct smc_connection *conn)
} else {
smc_cdc_tx_dismiss_slots(conn);
}
- smc_lgr_unregister_conn(conn);
- smc_buf_unuse(conn, lgr); /* allow buffer reuse */
- conn->lgr = NULL;
+ if (!list_empty(&lgr->list)) {
+ smc_lgr_unregister_conn(conn);
+ smc_buf_unuse(conn, lgr); /* allow buffer reuse */
+ }
if (!lgr->conns_num)
smc_lgr_schedule_free_work(lgr);
@@ -433,23 +479,50 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
static void smc_lgr_free(struct smc_link_group *lgr)
{
smc_lgr_free_bufs(lgr);
- if (lgr->is_smcd)
+ if (lgr->is_smcd) {
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
- else
+ put_device(&lgr->smcd->dev);
+ } else {
smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
+ put_device(&lgr->lnk[SMC_SINGLE_LINK].smcibdev->ibdev->dev);
+ }
kfree(lgr);
}
void smc_lgr_forget(struct smc_link_group *lgr)
{
- spin_lock_bh(&smc_lgr_list.lock);
+ struct list_head *lgr_list;
+ spinlock_t *lgr_lock;
+
+ lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
+ spin_lock_bh(lgr_lock);
/* do not use this link group for new connections */
- if (!list_empty(&lgr->list))
- list_del_init(&lgr->list);
- spin_unlock_bh(&smc_lgr_list.lock);
+ if (!list_empty(lgr_list))
+ list_del_init(lgr_list);
+ spin_unlock_bh(lgr_lock);
+}
+
+static void smc_sk_wake_ups(struct smc_sock *smc)
+{
+ smc->sk.sk_write_space(&smc->sk);
+ smc->sk.sk_data_ready(&smc->sk);
+ smc->sk.sk_state_change(&smc->sk);
}
-/* terminate linkgroup abnormally */
+/* kill a connection */
+static void smc_conn_kill(struct smc_connection *conn)
+{
+ struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+
+ smc_close_abort(conn);
+ conn->killed = 1;
+ smc_sk_wake_ups(smc);
+ smc_lgr_unregister_conn(conn);
+ smc->sk.sk_err = ECONNABORTED;
+ smc_close_active_abort(smc);
+}
+
+/* terminate link group */
static void __smc_lgr_terminate(struct smc_link_group *lgr)
{
struct smc_connection *conn;
@@ -459,52 +532,65 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr)
if (lgr->terminating)
return; /* lgr already terminating */
lgr->terminating = 1;
- if (!list_empty(&lgr->list)) /* forget lgr */
- list_del_init(&lgr->list);
if (!lgr->is_smcd)
smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
- write_lock_bh(&lgr->conns_lock);
+ /* kill remaining link group connections */
+ read_lock_bh(&lgr->conns_lock);
node = rb_first(&lgr->conns_all);
while (node) {
+ read_unlock_bh(&lgr->conns_lock);
conn = rb_entry(node, struct smc_connection, alert_node);
smc = container_of(conn, struct smc_sock, conn);
- sock_hold(&smc->sk); /* sock_put in close work */
- conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
- __smc_lgr_unregister_conn(conn);
- conn->lgr = NULL;
- write_unlock_bh(&lgr->conns_lock);
- if (!schedule_work(&conn->close_work))
- sock_put(&smc->sk);
- write_lock_bh(&lgr->conns_lock);
+ sock_hold(&smc->sk); /* sock_put below */
+ lock_sock(&smc->sk);
+ smc_conn_kill(conn);
+ release_sock(&smc->sk);
+ sock_put(&smc->sk); /* sock_hold above */
+ read_lock_bh(&lgr->conns_lock);
node = rb_first(&lgr->conns_all);
}
- write_unlock_bh(&lgr->conns_lock);
+ read_unlock_bh(&lgr->conns_lock);
if (!lgr->is_smcd)
wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
- smc_lgr_schedule_free_work(lgr);
+ smc_lgr_schedule_free_work_fast(lgr);
}
+/* unlink and terminate link group */
void smc_lgr_terminate(struct smc_link_group *lgr)
{
- spin_lock_bh(&smc_lgr_list.lock);
+ spinlock_t *lgr_lock;
+
+ smc_lgr_list_head(lgr, &lgr_lock);
+ spin_lock_bh(lgr_lock);
+ if (lgr->terminating) {
+ spin_unlock_bh(lgr_lock);
+ return; /* lgr already terminating */
+ }
+ list_del_init(&lgr->list);
+ spin_unlock_bh(lgr_lock);
__smc_lgr_terminate(lgr);
- spin_unlock_bh(&smc_lgr_list.lock);
}
/* Called when IB port is terminated */
void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
{
struct smc_link_group *lgr, *l;
+ LIST_HEAD(lgr_free_list);
spin_lock_bh(&smc_lgr_list.lock);
list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
if (!lgr->is_smcd &&
lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
- __smc_lgr_terminate(lgr);
+ list_move(&lgr->list, &lgr_free_list);
}
spin_unlock_bh(&smc_lgr_list.lock);
+
+ list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
+ list_del_init(&lgr->list);
+ __smc_lgr_terminate(lgr);
+ }
}
/* Called when SMC-D device is terminated or peer is lost */
@@ -514,20 +600,19 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
LIST_HEAD(lgr_free_list);
/* run common cleanup function and build free list */
- spin_lock_bh(&smc_lgr_list.lock);
- list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
- if (lgr->is_smcd && lgr->smcd == dev &&
- (!peer_gid || lgr->peer_gid == peer_gid) &&
+ spin_lock_bh(&dev->lgr_lock);
+ list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
+ if ((!peer_gid || lgr->peer_gid == peer_gid) &&
(vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
- __smc_lgr_terminate(lgr);
list_move(&lgr->list, &lgr_free_list);
}
}
- spin_unlock_bh(&smc_lgr_list.lock);
+ spin_unlock_bh(&dev->lgr_lock);
/* cancel the regular free workers and actually free lgrs */
list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
list_del_init(&lgr->list);
+ __smc_lgr_terminate(lgr);
cancel_delayed_work_sync(&lgr->free_work);
if (!peer_gid && vlan == VLAN_VID_MASK) /* dev terminated? */
smc_ism_signal_shutdown(lgr);
@@ -607,10 +692,14 @@ static bool smcd_lgr_match(struct smc_link_group *lgr,
int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
{
struct smc_connection *conn = &smc->conn;
+ struct list_head *lgr_list;
struct smc_link_group *lgr;
enum smc_lgr_role role;
+ spinlock_t *lgr_lock;
int rc = 0;
+ lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
+ lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
ini->cln_first_contact = SMC_FIRST_CONTACT;
role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
if (role == SMC_CLNT && ini->srv_first_contact)
@@ -618,8 +707,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
goto create;
/* determine if an existing link group can be reused */
- spin_lock_bh(&smc_lgr_list.lock);
- list_for_each_entry(lgr, &smc_lgr_list.list, list) {
+ spin_lock_bh(lgr_lock);
+ list_for_each_entry(lgr, lgr_list, list) {
write_lock_bh(&lgr->conns_lock);
if ((ini->is_smcd ?
smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
@@ -639,7 +728,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
}
write_unlock_bh(&lgr->conns_lock);
}
- spin_unlock_bh(&smc_lgr_list.lock);
+ spin_unlock_bh(lgr_lock);
if (role == SMC_CLNT && !ini->srv_first_contact &&
ini->cln_first_contact == SMC_FIRST_CONTACT) {
@@ -1027,16 +1116,45 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn,
return 0;
}
+static void smc_core_going_away(void)
+{
+ struct smc_ib_device *smcibdev;
+ struct smcd_dev *smcd;
+
+ spin_lock(&smc_ib_devices.lock);
+ list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
+ int i;
+
+ for (i = 0; i < SMC_MAX_PORTS; i++)
+ set_bit(i, smcibdev->ports_going_away);
+ }
+ spin_unlock(&smc_ib_devices.lock);
+
+ spin_lock(&smcd_dev_list.lock);
+ list_for_each_entry(smcd, &smcd_dev_list.list, list) {
+ smcd->going_away = 1;
+ }
+ spin_unlock(&smcd_dev_list.lock);
+}
+
/* Called (from smc_exit) when module is removed */
void smc_core_exit(void)
{
struct smc_link_group *lgr, *lg;
LIST_HEAD(lgr_freeing_list);
+ struct smcd_dev *smcd;
+
+ smc_core_going_away();
spin_lock_bh(&smc_lgr_list.lock);
- if (!list_empty(&smc_lgr_list.list))
- list_splice_init(&smc_lgr_list.list, &lgr_freeing_list);
+ list_splice_init(&smc_lgr_list.list, &lgr_freeing_list);
spin_unlock_bh(&smc_lgr_list.lock);
+
+ spin_lock(&smcd_dev_list.lock);
+ list_for_each_entry(smcd, &smcd_dev_list.list, list)
+ list_splice_init(&smcd->lgr_list, &lgr_freeing_list);
+ spin_unlock(&smcd_dev_list.lock);
+
list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
list_del_init(&lgr->list);
if (!lgr->is_smcd) {
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index c00ac61dc129..e6fd1ed42064 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -202,8 +202,11 @@ struct smc_link_group {
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */
+ struct work_struct terminate_work; /* abnormal lgr termination */
u8 sync_err : 1; /* lgr no longer fits to peer */
u8 terminating : 1;/* lgr is terminating */
+ u8 freefast : 1; /* free worker scheduled fast */
+ u8 freeing : 1; /* lgr is being freed */
bool is_smcd; /* SMC-R or SMC-D */
union {
@@ -280,6 +283,12 @@ static inline struct smc_connection *smc_lgr_find_conn(
return res;
}
+static inline void smc_lgr_terminate_sched(struct smc_link_group *lgr)
+{
+ if (!lgr->terminating)
+ schedule_work(&lgr->terminate_work);
+}
+
struct smc_sock;
struct smc_clc_msg_accept_confirm;
struct smc_clc_msg_local;
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index d14ca4af6f94..af05daeb0538 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -242,8 +242,12 @@ static void smc_ib_port_event_work(struct work_struct *work)
for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) {
smc_ib_remember_port_attr(smcibdev, port_idx + 1);
clear_bit(port_idx, &smcibdev->port_event_mask);
- if (!smc_ib_port_active(smcibdev, port_idx + 1))
+ if (!smc_ib_port_active(smcibdev, port_idx + 1)) {
+ set_bit(port_idx, smcibdev->ports_going_away);
smc_port_terminate(smcibdev, port_idx + 1);
+ } else {
+ clear_bit(port_idx, smcibdev->ports_going_away);
+ }
}
}
@@ -259,8 +263,10 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
switch (ibevent->event) {
case IB_EVENT_DEVICE_FATAL:
/* terminate all ports on device */
- for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++)
+ for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++) {
set_bit(port_idx, &smcibdev->port_event_mask);
+ set_bit(port_idx, smcibdev->ports_going_away);
+ }
schedule_work(&smcibdev->port_event_work);
break;
case IB_EVENT_PORT_ERR:
@@ -269,6 +275,10 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
port_idx = ibevent->element.port_num - 1;
if (port_idx < SMC_MAX_PORTS) {
set_bit(port_idx, &smcibdev->port_event_mask);
+ if (ibevent->event == IB_EVENT_PORT_ERR)
+ set_bit(port_idx, smcibdev->ports_going_away);
+ else if (ibevent->event == IB_EVENT_PORT_ACTIVE)
+ clear_bit(port_idx, smcibdev->ports_going_away);
schedule_work(&smcibdev->port_event_work);
}
break;
@@ -307,6 +317,7 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
port_idx = ibevent->element.qp->port - 1;
if (port_idx < SMC_MAX_PORTS) {
set_bit(port_idx, &smcibdev->port_event_mask);
+ set_bit(port_idx, smcibdev->ports_going_away);
schedule_work(&smcibdev->port_event_work);
}
break;
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index da60ab9e8d70..6a0069db6cae 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -47,6 +47,7 @@ struct smc_ib_device { /* ib-device infos for smc */
u8 initialized : 1; /* ib dev CQ, evthdl done */
struct work_struct port_event_work;
unsigned long port_event_mask;
+ DECLARE_BITMAP(ports_going_away, SMC_MAX_PORTS);
};
struct smc_buf_desc;
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index e89e918b88e0..ee7340898cb4 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -286,7 +286,9 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
smc_pnetid_by_dev_port(parent, 0, smcd->pnetid);
spin_lock_init(&smcd->lock);
+ spin_lock_init(&smcd->lgr_lock);
INIT_LIST_HEAD(&smcd->vlan);
+ INIT_LIST_HEAD(&smcd->lgr_list);
smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
WQ_MEM_RECLAIM, name);
if (!smcd->event_wq) {
@@ -313,6 +315,7 @@ void smcd_unregister_dev(struct smcd_dev *smcd)
spin_lock(&smcd_dev_list.lock);
list_del(&smcd->list);
spin_unlock(&smcd_dev_list.lock);
+ smcd->going_away = 1;
flush_workqueue(smcd->event_wq);
destroy_workqueue(smcd->event_wq);
smc_smcd_terminate(smcd, 0, VLAN_VID_MASK);
@@ -342,6 +345,8 @@ void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event)
{
struct smc_ism_event_work *wrk;
+ if (smcd->going_away)
+ return;
/* copy event to event work queue, and let it be handled there */
wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC);
if (!wrk)
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 4fd60c522802..e1918ffaf125 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -475,7 +475,7 @@ static void smc_llc_rx_delete_link(struct smc_link *link,
smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true);
}
smc_llc_send_message(link, llc, sizeof(*llc));
- smc_lgr_schedule_free_work_fast(lgr);
+ smc_lgr_terminate_sched(lgr);
}
}
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 571e6d84da3b..82dedf052d86 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -779,6 +779,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev,
dev_put(ndev);
if (netdev == ndev &&
smc_ib_port_active(ibdev, i) &&
+ !test_bit(i - 1, ibdev->ports_going_away) &&
!smc_ib_determine_gid(ibdev, i, ini->vlan_id,
ini->ib_gid, NULL)) {
ini->ib_dev = ibdev;
@@ -818,6 +819,7 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
continue;
if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) &&
smc_ib_port_active(ibdev, i) &&
+ !test_bit(i - 1, ibdev->ports_going_away) &&
!smc_ib_determine_gid(ibdev, i, ini->vlan_id,
ini->ib_gid, NULL)) {
ini->ib_dev = ibdev;
@@ -844,7 +846,8 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
spin_lock(&smcd_dev_list.lock);
list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
- if (smc_pnet_match(ismdev->pnetid, ndev_pnetid)) {
+ if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) &&
+ !ismdev->going_away) {
ini->ism_dev = ismdev;
break;
}
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 97e8369002d7..39d7b34d06d2 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -201,6 +201,8 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo,
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct smc_connection *conn = &smc->conn;
+ struct smc_cdc_conn_state_flags *cflags =
+ &conn->local_tx_ctrl.conn_state_flags;
struct sock *sk = &smc->sk;
int rc;
@@ -210,7 +212,9 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo,
add_wait_queue(sk_sleep(sk), &wait);
rc = sk_wait_event(sk, timeo,
sk->sk_err ||
+ cflags->peer_conn_abort ||
sk->sk_shutdown & RCV_SHUTDOWN ||
+ conn->killed ||
fcrit(conn),
&wait);
remove_wait_queue(sk_sleep(sk), &wait);
@@ -314,11 +318,13 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
if (read_done >= target || (pipe && read_done))
break;
+ if (conn->killed)
+ break;
+
if (smc_rx_recvmsg_data_available(smc))
goto copy;
- if (sk->sk_shutdown & RCV_SHUTDOWN ||
- conn->local_tx_ctrl.conn_state_flags.peer_conn_abort) {
+ if (sk->sk_shutdown & RCV_SHUTDOWN) {
/* smc_cdc_msg_recv_action() could have run after
* above smc_rx_recvmsg_data_available()
*/
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 6c8f09c1ce51..824f096ee7de 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -86,6 +86,7 @@ static int smc_tx_wait(struct smc_sock *smc, int flags)
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
if (sk->sk_err ||
(sk->sk_shutdown & SEND_SHUTDOWN) ||
+ conn->killed ||
conn->local_tx_ctrl.conn_state_flags.peer_done_writing) {
rc = -EPIPE;
break;
@@ -155,7 +156,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
return -ENOTCONN;
if (smc->sk.sk_shutdown & SEND_SHUTDOWN ||
(smc->sk.sk_err == ECONNABORTED) ||
- conn->local_tx_ctrl.conn_state_flags.peer_conn_abort)
+ conn->killed)
return -EPIPE;
if (smc_cdc_rxed_any_close(conn))
return send_done ?: -ECONNRESET;
@@ -282,10 +283,8 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
peer_rmbe_offset;
rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL);
- if (rc) {
- conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
+ if (rc)
smc_lgr_terminate(lgr);
- }
return rc;
}
@@ -495,10 +494,11 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
if (smc->sk.sk_err == ECONNABORTED)
return sock_error(&smc->sk);
+ if (conn->killed)
+ return -EPIPE;
rc = 0;
- if (conn->alert_token_local) /* connection healthy */
- mod_delayed_work(system_wq, &conn->tx_work,
- SMC_TX_WORK_DELAY);
+ mod_delayed_work(system_wq, &conn->tx_work,
+ SMC_TX_WORK_DELAY);
}
return rc;
}
@@ -547,6 +547,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
{
int rc;
+ if (conn->killed ||
+ conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
+ return -EPIPE; /* connection being aborted */
if (conn->lgr->is_smcd)
rc = smcd_tx_sndbuf_nonempty(conn);
else
@@ -573,9 +576,7 @@ void smc_tx_work(struct work_struct *work)
int rc;
lock_sock(&smc->sk);
- if (smc->sk.sk_err ||
- !conn->alert_token_local ||
- conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
+ if (smc->sk.sk_err)
goto out;
rc = smc_tx_sndbuf_nonempty(conn);
@@ -608,8 +609,11 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)
((to_confirm > conn->rmbe_update_limit) &&
((sender_free <= (conn->rmb_desc->len / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) {
+ if (conn->killed ||
+ conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
+ return;
if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
- conn->alert_token_local) { /* connection healthy */
+ !conn->killed) {
schedule_delayed_work(&conn->tx_work,
SMC_TX_WORK_DELAY);
return;
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 253aa75dc2b6..50743dc56c86 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -101,7 +101,7 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
clear_bit(i, link->wr_tx_mask);
}
/* terminate connections of this link group abnormally */
- smc_lgr_terminate(smc_get_lgr(link));
+ smc_lgr_terminate_sched(smc_get_lgr(link));
}
if (pnd_snd.handler)
pnd_snd.handler(&pnd_snd.priv, link, wc->status);
@@ -191,7 +191,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
SMC_WR_TX_WAIT_FREE_SLOT_TIME);
if (!rc) {
/* timeout - terminate connections */
- smc_lgr_terminate(smc_get_lgr(link));
+ smc_lgr_terminate_sched(smc_get_lgr(link));
return -EPIPE;
}
if (idx == link->wr_tx_cnt)
@@ -247,7 +247,7 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL);
if (rc) {
smc_wr_tx_put_slot(link, priv);
- smc_lgr_terminate(smc_get_lgr(link));
+ smc_lgr_terminate_sched(smc_get_lgr(link));
}
return rc;
}
@@ -272,7 +272,7 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
SMC_WR_REG_MR_WAIT_TIME);
if (!rc) {
/* timeout - terminate connections */
- smc_lgr_terminate(smc_get_lgr(link));
+ smc_lgr_terminate_sched(smc_get_lgr(link));
return -EPIPE;
}
if (rc == -ERESTARTSYS)
@@ -373,7 +373,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
/* terminate connections of this link group
* abnormally
*/
- smc_lgr_terminate(smc_get_lgr(link));
+ smc_lgr_terminate_sched(smc_get_lgr(link));
break;
default:
smc_wr_rx_post(link); /* refill WR RX */
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index b83e16ade4d2..716b61a701a8 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -35,6 +35,21 @@ config TIPC_MEDIA_UDP
Saying Y here will enable support for running TIPC over IP/UDP
bool
default y
+config TIPC_CRYPTO
+ bool "TIPC encryption support"
+ depends on TIPC
+ select CRYPTO
+ select CRYPTO_AES
+ select CRYPTO_GCM
+ help
+ Saying Y here will enable support for TIPC encryption.
+ All TIPC messages will be encrypted/decrypted by using the currently most
+ advanced algorithm: AEAD AES-GCM (like IPSec or TLS) before leaving/
+ entering the TIPC stack.
+ Key setting from user-space is performed via netlink by a user program
+ (e.g. the iproute2 'tipc' tool).
+ bool
+ default y
config TIPC_DIAG
tristate "TIPC: socket monitoring interface"
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index c86aba0282af..11255e970dd4 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -16,6 +16,7 @@ CFLAGS_trace.o += -I$(src)
tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o
tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
tipc-$(CONFIG_SYSCTL) += sysctl.o
+tipc-$(CONFIG_TIPC_CRYPTO) += crypto.o
obj-$(CONFIG_TIPC_DIAG) += diag.o
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 6ef1abdd525f..f41096a759fa 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -84,7 +84,7 @@ static struct tipc_bc_base *tipc_bc_base(struct net *net)
*/
int tipc_bcast_get_mtu(struct net *net)
{
- return tipc_link_mtu(tipc_bc_sndlink(net)) - INT_H_SIZE;
+ return tipc_link_mss(tipc_bc_sndlink(net));
}
void tipc_bcast_disable_rcast(struct net *net)
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 0214aa1c4427..d7ec26bd739d 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -44,6 +44,7 @@
#include "netlink.h"
#include "udp_media.h"
#include "trace.h"
+#include "crypto.h"
#define MAX_ADDR_STR 60
@@ -315,6 +316,7 @@ static int tipc_enable_bearer(struct net *net, const char *name,
b->net_plane = bearer_id + 'A';
b->priority = prio;
test_and_set_bit_lock(0, &b->up);
+ refcount_set(&b->refcnt, 1);
res = tipc_disc_create(net, b, &b->bcast_addr, &skb);
if (res) {
@@ -351,6 +353,17 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b)
return 0;
}
+bool tipc_bearer_hold(struct tipc_bearer *b)
+{
+ return (b && refcount_inc_not_zero(&b->refcnt));
+}
+
+void tipc_bearer_put(struct tipc_bearer *b)
+{
+ if (b && refcount_dec_and_test(&b->refcnt))
+ kfree_rcu(b, rcu);
+}
+
/**
* bearer_disable
*
@@ -369,7 +382,7 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b)
if (b->disc)
tipc_disc_delete(b->disc);
RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL);
- kfree_rcu(b, rcu);
+ tipc_bearer_put(b);
tipc_mon_delete(net, bearer_id);
}
@@ -504,10 +517,15 @@ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id,
rcu_read_lock();
b = bearer_get(net, bearer_id);
- if (likely(b && (test_bit(0, &b->up) || msg_is_reset(hdr))))
- b->media->send_msg(net, skb, b, dest);
- else
+ if (likely(b && (test_bit(0, &b->up) || msg_is_reset(hdr)))) {
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_xmit(net, &skb, b, dest, NULL);
+ if (skb)
+#endif
+ b->media->send_msg(net, skb, b, dest);
+ } else {
kfree_skb(skb);
+ }
rcu_read_unlock();
}
@@ -515,7 +533,8 @@ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id,
*/
void tipc_bearer_xmit(struct net *net, u32 bearer_id,
struct sk_buff_head *xmitq,
- struct tipc_media_addr *dst)
+ struct tipc_media_addr *dst,
+ struct tipc_node *__dnode)
{
struct tipc_bearer *b;
struct sk_buff *skb, *tmp;
@@ -529,10 +548,15 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id,
__skb_queue_purge(xmitq);
skb_queue_walk_safe(xmitq, skb, tmp) {
__skb_dequeue(xmitq);
- if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb))))
- b->media->send_msg(net, skb, b, dst);
- else
+ if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb)))) {
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_xmit(net, &skb, b, dst, __dnode);
+ if (skb)
+#endif
+ b->media->send_msg(net, skb, b, dst);
+ } else {
kfree_skb(skb);
+ }
}
rcu_read_unlock();
}
@@ -543,6 +567,7 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
struct sk_buff_head *xmitq)
{
struct tipc_net *tn = tipc_net(net);
+ struct tipc_media_addr *dst;
int net_id = tn->net_id;
struct tipc_bearer *b;
struct sk_buff *skb, *tmp;
@@ -557,7 +582,12 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
msg_set_non_seq(hdr, 1);
msg_set_mc_netid(hdr, net_id);
__skb_dequeue(xmitq);
- b->media->send_msg(net, skb, b, &b->bcast_addr);
+ dst = &b->bcast_addr;
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_xmit(net, &skb, b, dst, NULL);
+ if (skb)
+#endif
+ b->media->send_msg(net, skb, b, dst);
}
rcu_read_unlock();
}
@@ -584,6 +614,7 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev,
if (likely(b && test_bit(0, &b->up) &&
(skb->pkt_type <= PACKET_MULTICAST))) {
skb_mark_not_on_list(skb);
+ TIPC_SKB_CB(skb)->flags = 0;
tipc_rcv(dev_net(b->pt.dev), skb, b);
rcu_read_unlock();
return NET_RX_SUCCESS;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index ea0f3c49cbed..d0c79cc6c0c2 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -165,6 +165,7 @@ struct tipc_bearer {
struct tipc_discoverer *disc;
char net_plane;
unsigned long up;
+ refcount_t refcnt;
};
struct tipc_bearer_names {
@@ -210,6 +211,8 @@ int tipc_media_set_window(const char *name, u32 new_value);
int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
struct nlattr *attrs[]);
+bool tipc_bearer_hold(struct tipc_bearer *b);
+void tipc_bearer_put(struct tipc_bearer *b);
void tipc_disable_l2_media(struct tipc_bearer *b);
int tipc_l2_send_msg(struct net *net, struct sk_buff *buf,
struct tipc_bearer *b, struct tipc_media_addr *dest);
@@ -229,7 +232,8 @@ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id,
struct tipc_media_addr *dest);
void tipc_bearer_xmit(struct net *net, u32 bearer_id,
struct sk_buff_head *xmitq,
- struct tipc_media_addr *dst);
+ struct tipc_media_addr *dst,
+ struct tipc_node *__dnode);
void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
struct sk_buff_head *xmitq);
void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 23cb379a93d6..fc01a13d7462 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -44,6 +44,7 @@
#include "socket.h"
#include "bcast.h"
#include "node.h"
+#include "crypto.h"
#include <linux/module.h>
@@ -68,6 +69,11 @@ static int __net_init tipc_init_net(struct net *net)
INIT_LIST_HEAD(&tn->node_list);
spin_lock_init(&tn->node_list_lock);
+#ifdef CONFIG_TIPC_CRYPTO
+ err = tipc_crypto_start(&tn->crypto_tx, net, NULL);
+ if (err)
+ goto out_crypto;
+#endif
err = tipc_sk_rht_init(net);
if (err)
goto out_sk_rht;
@@ -93,6 +99,11 @@ out_bclink:
out_nametbl:
tipc_sk_rht_destroy(net);
out_sk_rht:
+
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_stop(&tn->crypto_tx);
+out_crypto:
+#endif
return err;
}
@@ -103,8 +114,20 @@ static void __net_exit tipc_exit_net(struct net *net)
tipc_bcast_stop(net);
tipc_nametbl_stop(net);
tipc_sk_rht_destroy(net);
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_stop(&tipc_net(net)->crypto_tx);
+#endif
+}
+
+static void __net_exit tipc_pernet_pre_exit(struct net *net)
+{
+ tipc_node_pre_cleanup_net(net);
}
+static struct pernet_operations tipc_pernet_pre_exit_ops = {
+ .pre_exit = tipc_pernet_pre_exit,
+};
+
static struct pernet_operations tipc_net_ops = {
.init = tipc_init_net,
.exit = tipc_exit_net,
@@ -151,6 +174,10 @@ static int __init tipc_init(void)
if (err)
goto out_pernet_topsrv;
+ err = register_pernet_subsys(&tipc_pernet_pre_exit_ops);
+ if (err)
+ goto out_register_pernet_subsys;
+
err = tipc_bearer_setup();
if (err)
goto out_bearer;
@@ -158,6 +185,8 @@ static int __init tipc_init(void)
pr_info("Started in single node mode\n");
return 0;
out_bearer:
+ unregister_pernet_subsys(&tipc_pernet_pre_exit_ops);
+out_register_pernet_subsys:
unregister_pernet_device(&tipc_topsrv_net_ops);
out_pernet_topsrv:
tipc_socket_stop();
@@ -177,6 +206,7 @@ out_netlink:
static void __exit tipc_exit(void)
{
tipc_bearer_cleanup();
+ unregister_pernet_subsys(&tipc_pernet_pre_exit_ops);
unregister_pernet_device(&tipc_topsrv_net_ops);
tipc_socket_stop();
unregister_pernet_device(&tipc_net_ops);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 60d829581068..775848a5f27e 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -59,6 +59,7 @@
#include <net/netns/generic.h>
#include <linux/rhashtable.h>
#include <net/genetlink.h>
+#include <net/netns/hash.h>
struct tipc_node;
struct tipc_bearer;
@@ -67,6 +68,9 @@ struct tipc_link;
struct tipc_name_table;
struct tipc_topsrv;
struct tipc_monitor;
+#ifdef CONFIG_TIPC_CRYPTO
+struct tipc_crypto;
+#endif
#define TIPC_MOD_VER "2.0.0"
@@ -128,6 +132,11 @@ struct tipc_net {
/* Tracing of node internal messages */
struct packet_type loopback_pt;
+
+#ifdef CONFIG_TIPC_CRYPTO
+ /* TX crypto handler */
+ struct tipc_crypto *crypto_tx;
+#endif
};
static inline struct tipc_net *tipc_net(struct net *net)
@@ -185,6 +194,11 @@ static inline int in_range(u16 val, u16 min, u16 max)
return !less(val, min) && !more(val, max);
}
+static inline u32 tipc_net_hash_mixes(struct net *net, int tn_rand)
+{
+ return net_hash_mix(&init_net) ^ net_hash_mix(net) ^ tn_rand;
+}
+
#ifdef CONFIG_SYSCTL
int tipc_register_sysctl(void);
void tipc_unregister_sysctl(void);
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
new file mode 100644
index 000000000000..05f7ca76e8ce
--- /dev/null
+++ b/net/tipc/crypto.c
@@ -0,0 +1,1986 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * net/tipc/crypto.c: TIPC crypto for key handling & packet en/decryption
+ *
+ * Copyright (c) 2019, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <crypto/aead.h>
+#include <crypto/aes.h>
+#include "crypto.h"
+
+#define TIPC_TX_PROBE_LIM msecs_to_jiffies(1000) /* > 1s */
+#define TIPC_TX_LASTING_LIM msecs_to_jiffies(120000) /* 2 mins */
+#define TIPC_RX_ACTIVE_LIM msecs_to_jiffies(3000) /* 3s */
+#define TIPC_RX_PASSIVE_LIM msecs_to_jiffies(180000) /* 3 mins */
+#define TIPC_MAX_TFMS_DEF 10
+#define TIPC_MAX_TFMS_LIM 1000
+
+/**
+ * TIPC Key ids
+ */
+enum {
+ KEY_UNUSED = 0,
+ KEY_MIN,
+ KEY_1 = KEY_MIN,
+ KEY_2,
+ KEY_3,
+ KEY_MAX = KEY_3,
+};
+
+/**
+ * TIPC Crypto statistics
+ */
+enum {
+ STAT_OK,
+ STAT_NOK,
+ STAT_ASYNC,
+ STAT_ASYNC_OK,
+ STAT_ASYNC_NOK,
+ STAT_BADKEYS, /* tx only */
+ STAT_BADMSGS = STAT_BADKEYS, /* rx only */
+ STAT_NOKEYS,
+ STAT_SWITCHES,
+
+ MAX_STATS,
+};
+
+/* TIPC crypto statistics' header */
+static const char *hstats[MAX_STATS] = {"ok", "nok", "async", "async_ok",
+ "async_nok", "badmsgs", "nokeys",
+ "switches"};
+
+/* Max TFMs number per key */
+int sysctl_tipc_max_tfms __read_mostly = TIPC_MAX_TFMS_DEF;
+
+/**
+ * struct tipc_key - TIPC keys' status indicator
+ *
+ * 7 6 5 4 3 2 1 0
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * key: | (reserved)|passive idx| active idx|pending idx|
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ */
+struct tipc_key {
+#define KEY_BITS (2)
+#define KEY_MASK ((1 << KEY_BITS) - 1)
+ union {
+ struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 pending:2,
+ active:2,
+ passive:2, /* rx only */
+ reserved:2;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ u8 reserved:2,
+ passive:2, /* rx only */
+ active:2,
+ pending:2;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ } __packed;
+ u8 keys;
+ };
+};
+
+/**
+ * struct tipc_tfm - TIPC TFM structure to form a list of TFMs
+ */
+struct tipc_tfm {
+ struct crypto_aead *tfm;
+ struct list_head list;
+};
+
+/**
+ * struct tipc_aead - TIPC AEAD key structure
+ * @tfm_entry: per-cpu pointer to one entry in TFM list
+ * @crypto: TIPC crypto owns this key
+ * @cloned: reference to the source key in case cloning
+ * @users: the number of the key users (TX/RX)
+ * @salt: the key's SALT value
+ * @authsize: authentication tag size (max = 16)
+ * @mode: crypto mode is applied to the key
+ * @hint[]: a hint for user key
+ * @rcu: struct rcu_head
+ * @seqno: the key seqno (cluster scope)
+ * @refcnt: the key reference counter
+ */
+struct tipc_aead {
+#define TIPC_AEAD_HINT_LEN (5)
+ struct tipc_tfm * __percpu *tfm_entry;
+ struct tipc_crypto *crypto;
+ struct tipc_aead *cloned;
+ atomic_t users;
+ u32 salt;
+ u8 authsize;
+ u8 mode;
+ char hint[TIPC_AEAD_HINT_LEN + 1];
+ struct rcu_head rcu;
+
+ atomic64_t seqno ____cacheline_aligned;
+ refcount_t refcnt ____cacheline_aligned;
+
+} ____cacheline_aligned;
+
+/**
+ * struct tipc_crypto_stats - TIPC Crypto statistics
+ */
+struct tipc_crypto_stats {
+ unsigned int stat[MAX_STATS];
+};
+
+/**
+ * struct tipc_crypto - TIPC TX/RX crypto structure
+ * @net: struct net
+ * @node: TIPC node (RX)
+ * @aead: array of pointers to AEAD keys for encryption/decryption
+ * @peer_rx_active: replicated peer RX active key index
+ * @key: the key states
+ * @working: the crypto is working or not
+ * @stats: the crypto statistics
+ * @sndnxt: the per-peer sndnxt (TX)
+ * @timer1: general timer 1 (jiffies)
+ * @timer2: general timer 1 (jiffies)
+ * @lock: tipc_key lock
+ */
+struct tipc_crypto {
+ struct net *net;
+ struct tipc_node *node;
+ struct tipc_aead __rcu *aead[KEY_MAX + 1]; /* key[0] is UNUSED */
+ atomic_t peer_rx_active;
+ struct tipc_key key;
+ u8 working:1;
+ struct tipc_crypto_stats __percpu *stats;
+
+ atomic64_t sndnxt ____cacheline_aligned;
+ unsigned long timer1;
+ unsigned long timer2;
+ spinlock_t lock; /* crypto lock */
+
+} ____cacheline_aligned;
+
+/* struct tipc_crypto_tx_ctx - TX context for callbacks */
+struct tipc_crypto_tx_ctx {
+ struct tipc_aead *aead;
+ struct tipc_bearer *bearer;
+ struct tipc_media_addr dst;
+};
+
+/* struct tipc_crypto_rx_ctx - RX context for callbacks */
+struct tipc_crypto_rx_ctx {
+ struct tipc_aead *aead;
+ struct tipc_bearer *bearer;
+};
+
+static struct tipc_aead *tipc_aead_get(struct tipc_aead __rcu *aead);
+static inline void tipc_aead_put(struct tipc_aead *aead);
+static void tipc_aead_free(struct rcu_head *rp);
+static int tipc_aead_users(struct tipc_aead __rcu *aead);
+static void tipc_aead_users_inc(struct tipc_aead __rcu *aead, int lim);
+static void tipc_aead_users_dec(struct tipc_aead __rcu *aead, int lim);
+static void tipc_aead_users_set(struct tipc_aead __rcu *aead, int val);
+static struct crypto_aead *tipc_aead_tfm_next(struct tipc_aead *aead);
+static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey,
+ u8 mode);
+static int tipc_aead_clone(struct tipc_aead **dst, struct tipc_aead *src);
+static void *tipc_aead_mem_alloc(struct crypto_aead *tfm,
+ unsigned int crypto_ctx_size,
+ u8 **iv, struct aead_request **req,
+ struct scatterlist **sg, int nsg);
+static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb,
+ struct tipc_bearer *b,
+ struct tipc_media_addr *dst,
+ struct tipc_node *__dnode);
+static void tipc_aead_encrypt_done(struct crypto_async_request *base, int err);
+static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead,
+ struct sk_buff *skb, struct tipc_bearer *b);
+static void tipc_aead_decrypt_done(struct crypto_async_request *base, int err);
+static inline int tipc_ehdr_size(struct tipc_ehdr *ehdr);
+static int tipc_ehdr_build(struct net *net, struct tipc_aead *aead,
+ u8 tx_key, struct sk_buff *skb,
+ struct tipc_crypto *__rx);
+static inline void tipc_crypto_key_set_state(struct tipc_crypto *c,
+ u8 new_passive,
+ u8 new_active,
+ u8 new_pending);
+static int tipc_crypto_key_attach(struct tipc_crypto *c,
+ struct tipc_aead *aead, u8 pos);
+static bool tipc_crypto_key_try_align(struct tipc_crypto *rx, u8 new_pending);
+static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx,
+ struct tipc_crypto *rx,
+ struct sk_buff *skb);
+static void tipc_crypto_key_synch(struct tipc_crypto *rx, u8 new_rx_active,
+ struct tipc_msg *hdr);
+static int tipc_crypto_key_revoke(struct net *net, u8 tx_key);
+static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead,
+ struct tipc_bearer *b,
+ struct sk_buff **skb, int err);
+static void tipc_crypto_do_cmd(struct net *net, int cmd);
+static char *tipc_crypto_key_dump(struct tipc_crypto *c, char *buf);
+#ifdef TIPC_CRYPTO_DEBUG
+static char *tipc_key_change_dump(struct tipc_key old, struct tipc_key new,
+ char *buf);
+#endif
+
+#define key_next(cur) ((cur) % KEY_MAX + 1)
+
+#define tipc_aead_rcu_ptr(rcu_ptr, lock) \
+ rcu_dereference_protected((rcu_ptr), lockdep_is_held(lock))
+
+#define tipc_aead_rcu_swap(rcu_ptr, ptr, lock) \
+ rcu_swap_protected((rcu_ptr), (ptr), lockdep_is_held(lock))
+
+#define tipc_aead_rcu_replace(rcu_ptr, ptr, lock) \
+do { \
+ typeof(rcu_ptr) __tmp = rcu_dereference_protected((rcu_ptr), \
+ lockdep_is_held(lock)); \
+ rcu_assign_pointer((rcu_ptr), (ptr)); \
+ tipc_aead_put(__tmp); \
+} while (0)
+
+#define tipc_crypto_key_detach(rcu_ptr, lock) \
+ tipc_aead_rcu_replace((rcu_ptr), NULL, lock)
+
+/**
+ * tipc_aead_key_validate - Validate a AEAD user key
+ */
+int tipc_aead_key_validate(struct tipc_aead_key *ukey)
+{
+ int keylen;
+
+ /* Check if algorithm exists */
+ if (unlikely(!crypto_has_alg(ukey->alg_name, 0, 0))) {
+ pr_info("Not found cipher: \"%s\"!\n", ukey->alg_name);
+ return -ENODEV;
+ }
+
+ /* Currently, we only support the "gcm(aes)" cipher algorithm */
+ if (strcmp(ukey->alg_name, "gcm(aes)"))
+ return -ENOTSUPP;
+
+ /* Check if key size is correct */
+ keylen = ukey->keylen - TIPC_AES_GCM_SALT_SIZE;
+ if (unlikely(keylen != TIPC_AES_GCM_KEY_SIZE_128 &&
+ keylen != TIPC_AES_GCM_KEY_SIZE_192 &&
+ keylen != TIPC_AES_GCM_KEY_SIZE_256))
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct tipc_aead *tipc_aead_get(struct tipc_aead __rcu *aead)
+{
+ struct tipc_aead *tmp;
+
+ rcu_read_lock();
+ tmp = rcu_dereference(aead);
+ if (unlikely(!tmp || !refcount_inc_not_zero(&tmp->refcnt)))
+ tmp = NULL;
+ rcu_read_unlock();
+
+ return tmp;
+}
+
+static inline void tipc_aead_put(struct tipc_aead *aead)
+{
+ if (aead && refcount_dec_and_test(&aead->refcnt))
+ call_rcu(&aead->rcu, tipc_aead_free);
+}
+
+/**
+ * tipc_aead_free - Release AEAD key incl. all the TFMs in the list
+ * @rp: rcu head pointer
+ */
+static void tipc_aead_free(struct rcu_head *rp)
+{
+ struct tipc_aead *aead = container_of(rp, struct tipc_aead, rcu);
+ struct tipc_tfm *tfm_entry, *head, *tmp;
+
+ if (aead->cloned) {
+ tipc_aead_put(aead->cloned);
+ } else {
+ head = *this_cpu_ptr(aead->tfm_entry);
+ list_for_each_entry_safe(tfm_entry, tmp, &head->list, list) {
+ crypto_free_aead(tfm_entry->tfm);
+ list_del(&tfm_entry->list);
+ kfree(tfm_entry);
+ }
+ /* Free the head */
+ crypto_free_aead(head->tfm);
+ list_del(&head->list);
+ kfree(head);
+ }
+ free_percpu(aead->tfm_entry);
+ kfree(aead);
+}
+
+static int tipc_aead_users(struct tipc_aead __rcu *aead)
+{
+ struct tipc_aead *tmp;
+ int users = 0;
+
+ rcu_read_lock();
+ tmp = rcu_dereference(aead);
+ if (tmp)
+ users = atomic_read(&tmp->users);
+ rcu_read_unlock();
+
+ return users;
+}
+
+static void tipc_aead_users_inc(struct tipc_aead __rcu *aead, int lim)
+{
+ struct tipc_aead *tmp;
+
+ rcu_read_lock();
+ tmp = rcu_dereference(aead);
+ if (tmp)
+ atomic_add_unless(&tmp->users, 1, lim);
+ rcu_read_unlock();
+}
+
+static void tipc_aead_users_dec(struct tipc_aead __rcu *aead, int lim)
+{
+ struct tipc_aead *tmp;
+
+ rcu_read_lock();
+ tmp = rcu_dereference(aead);
+ if (tmp)
+ atomic_add_unless(&rcu_dereference(aead)->users, -1, lim);
+ rcu_read_unlock();
+}
+
+static void tipc_aead_users_set(struct tipc_aead __rcu *aead, int val)
+{
+ struct tipc_aead *tmp;
+ int cur;
+
+ rcu_read_lock();
+ tmp = rcu_dereference(aead);
+ if (tmp) {
+ do {
+ cur = atomic_read(&tmp->users);
+ if (cur == val)
+ break;
+ } while (atomic_cmpxchg(&tmp->users, cur, val) != cur);
+ }
+ rcu_read_unlock();
+}
+
+/**
+ * tipc_aead_tfm_next - Move TFM entry to the next one in list and return it
+ */
+static struct crypto_aead *tipc_aead_tfm_next(struct tipc_aead *aead)
+{
+ struct tipc_tfm **tfm_entry = this_cpu_ptr(aead->tfm_entry);
+
+ *tfm_entry = list_next_entry(*tfm_entry, list);
+ return (*tfm_entry)->tfm;
+}
+
+/**
+ * tipc_aead_init - Initiate TIPC AEAD
+ * @aead: returned new TIPC AEAD key handle pointer
+ * @ukey: pointer to user key data
+ * @mode: the key mode
+ *
+ * Allocate a (list of) new cipher transformation (TFM) with the specific user
+ * key data if valid. The number of the allocated TFMs can be set via the sysfs
+ * "net/tipc/max_tfms" first.
+ * Also, all the other AEAD data are also initialized.
+ *
+ * Return: 0 if the initiation is successful, otherwise: < 0
+ */
+static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey,
+ u8 mode)
+{
+ struct tipc_tfm *tfm_entry, *head;
+ struct crypto_aead *tfm;
+ struct tipc_aead *tmp;
+ int keylen, err, cpu;
+ int tfm_cnt = 0;
+
+ if (unlikely(*aead))
+ return -EEXIST;
+
+ /* Allocate a new AEAD */
+ tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
+ if (unlikely(!tmp))
+ return -ENOMEM;
+
+ /* The key consists of two parts: [AES-KEY][SALT] */
+ keylen = ukey->keylen - TIPC_AES_GCM_SALT_SIZE;
+
+ /* Allocate per-cpu TFM entry pointer */
+ tmp->tfm_entry = alloc_percpu(struct tipc_tfm *);
+ if (!tmp->tfm_entry) {
+ kzfree(tmp);
+ return -ENOMEM;
+ }
+
+ /* Make a list of TFMs with the user key data */
+ do {
+ tfm = crypto_alloc_aead(ukey->alg_name, 0, 0);
+ if (IS_ERR(tfm)) {
+ err = PTR_ERR(tfm);
+ break;
+ }
+
+ if (unlikely(!tfm_cnt &&
+ crypto_aead_ivsize(tfm) != TIPC_AES_GCM_IV_SIZE)) {
+ crypto_free_aead(tfm);
+ err = -ENOTSUPP;
+ break;
+ }
+
+ err |= crypto_aead_setauthsize(tfm, TIPC_AES_GCM_TAG_SIZE);
+ err |= crypto_aead_setkey(tfm, ukey->key, keylen);
+ if (unlikely(err)) {
+ crypto_free_aead(tfm);
+ break;
+ }
+
+ tfm_entry = kmalloc(sizeof(*tfm_entry), GFP_KERNEL);
+ if (unlikely(!tfm_entry)) {
+ crypto_free_aead(tfm);
+ err = -ENOMEM;
+ break;
+ }
+ INIT_LIST_HEAD(&tfm_entry->list);
+ tfm_entry->tfm = tfm;
+
+ /* First entry? */
+ if (!tfm_cnt) {
+ head = tfm_entry;
+ for_each_possible_cpu(cpu) {
+ *per_cpu_ptr(tmp->tfm_entry, cpu) = head;
+ }
+ } else {
+ list_add_tail(&tfm_entry->list, &head->list);
+ }
+
+ } while (++tfm_cnt < sysctl_tipc_max_tfms);
+
+ /* Not any TFM is allocated? */
+ if (!tfm_cnt) {
+ free_percpu(tmp->tfm_entry);
+ kzfree(tmp);
+ return err;
+ }
+
+ /* Copy some chars from the user key as a hint */
+ memcpy(tmp->hint, ukey->key, TIPC_AEAD_HINT_LEN);
+ tmp->hint[TIPC_AEAD_HINT_LEN] = '\0';
+
+ /* Initialize the other data */
+ tmp->mode = mode;
+ tmp->cloned = NULL;
+ tmp->authsize = TIPC_AES_GCM_TAG_SIZE;
+ memcpy(&tmp->salt, ukey->key + keylen, TIPC_AES_GCM_SALT_SIZE);
+ atomic_set(&tmp->users, 0);
+ atomic64_set(&tmp->seqno, 0);
+ refcount_set(&tmp->refcnt, 1);
+
+ *aead = tmp;
+ return 0;
+}
+
+/**
+ * tipc_aead_clone - Clone a TIPC AEAD key
+ * @dst: dest key for the cloning
+ * @src: source key to clone from
+ *
+ * Make a "copy" of the source AEAD key data to the dest, the TFMs list is
+ * common for the keys.
+ * A reference to the source is hold in the "cloned" pointer for the later
+ * freeing purposes.
+ *
+ * Note: this must be done in cluster-key mode only!
+ * Return: 0 in case of success, otherwise < 0
+ */
+static int tipc_aead_clone(struct tipc_aead **dst, struct tipc_aead *src)
+{
+ struct tipc_aead *aead;
+ int cpu;
+
+ if (!src)
+ return -ENOKEY;
+
+ if (src->mode != CLUSTER_KEY)
+ return -EINVAL;
+
+ if (unlikely(*dst))
+ return -EEXIST;
+
+ aead = kzalloc(sizeof(*aead), GFP_ATOMIC);
+ if (unlikely(!aead))
+ return -ENOMEM;
+
+ aead->tfm_entry = alloc_percpu_gfp(struct tipc_tfm *, GFP_ATOMIC);
+ if (unlikely(!aead->tfm_entry)) {
+ kzfree(aead);
+ return -ENOMEM;
+ }
+
+ for_each_possible_cpu(cpu) {
+ *per_cpu_ptr(aead->tfm_entry, cpu) =
+ *per_cpu_ptr(src->tfm_entry, cpu);
+ }
+
+ memcpy(aead->hint, src->hint, sizeof(src->hint));
+ aead->mode = src->mode;
+ aead->salt = src->salt;
+ aead->authsize = src->authsize;
+ atomic_set(&aead->users, 0);
+ atomic64_set(&aead->seqno, 0);
+ refcount_set(&aead->refcnt, 1);
+
+ WARN_ON(!refcount_inc_not_zero(&src->refcnt));
+ aead->cloned = src;
+
+ *dst = aead;
+ return 0;
+}
+
+/**
+ * tipc_aead_mem_alloc - Allocate memory for AEAD request operations
+ * @tfm: cipher handle to be registered with the request
+ * @crypto_ctx_size: size of crypto context for callback
+ * @iv: returned pointer to IV data
+ * @req: returned pointer to AEAD request data
+ * @sg: returned pointer to SG lists
+ * @nsg: number of SG lists to be allocated
+ *
+ * Allocate memory to store the crypto context data, AEAD request, IV and SG
+ * lists, the memory layout is as follows:
+ * crypto_ctx || iv || aead_req || sg[]
+ *
+ * Return: the pointer to the memory areas in case of success, otherwise NULL
+ */
+static void *tipc_aead_mem_alloc(struct crypto_aead *tfm,
+ unsigned int crypto_ctx_size,
+ u8 **iv, struct aead_request **req,
+ struct scatterlist **sg, int nsg)
+{
+ unsigned int iv_size, req_size;
+ unsigned int len;
+ u8 *mem;
+
+ iv_size = crypto_aead_ivsize(tfm);
+ req_size = sizeof(**req) + crypto_aead_reqsize(tfm);
+
+ len = crypto_ctx_size;
+ len += iv_size;
+ len += crypto_aead_alignmask(tfm) & ~(crypto_tfm_ctx_alignment() - 1);
+ len = ALIGN(len, crypto_tfm_ctx_alignment());
+ len += req_size;
+ len = ALIGN(len, __alignof__(struct scatterlist));
+ len += nsg * sizeof(**sg);
+
+ mem = kmalloc(len, GFP_ATOMIC);
+ if (!mem)
+ return NULL;
+
+ *iv = (u8 *)PTR_ALIGN(mem + crypto_ctx_size,
+ crypto_aead_alignmask(tfm) + 1);
+ *req = (struct aead_request *)PTR_ALIGN(*iv + iv_size,
+ crypto_tfm_ctx_alignment());
+ *sg = (struct scatterlist *)PTR_ALIGN((u8 *)*req + req_size,
+ __alignof__(struct scatterlist));
+
+ return (void *)mem;
+}
+
+/**
+ * tipc_aead_encrypt - Encrypt a message
+ * @aead: TIPC AEAD key for the message encryption
+ * @skb: the input/output skb
+ * @b: TIPC bearer where the message will be delivered after the encryption
+ * @dst: the destination media address
+ * @__dnode: TIPC dest node if "known"
+ *
+ * Return:
+ * 0 : if the encryption has completed
+ * -EINPROGRESS/-EBUSY : if a callback will be performed
+ * < 0 : the encryption has failed
+ */
+static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb,
+ struct tipc_bearer *b,
+ struct tipc_media_addr *dst,
+ struct tipc_node *__dnode)
+{
+ struct crypto_aead *tfm = tipc_aead_tfm_next(aead);
+ struct tipc_crypto_tx_ctx *tx_ctx;
+ struct aead_request *req;
+ struct sk_buff *trailer;
+ struct scatterlist *sg;
+ struct tipc_ehdr *ehdr;
+ int ehsz, len, tailen, nsg, rc;
+ void *ctx;
+ u32 salt;
+ u8 *iv;
+
+ /* Make sure message len at least 4-byte aligned */
+ len = ALIGN(skb->len, 4);
+ tailen = len - skb->len + aead->authsize;
+
+ /* Expand skb tail for authentication tag:
+ * As for simplicity, we'd have made sure skb having enough tailroom
+ * for authentication tag @skb allocation. Even when skb is nonlinear
+ * but there is no frag_list, it should be still fine!
+ * Otherwise, we must cow it to be a writable buffer with the tailroom.
+ */
+#ifdef TIPC_CRYPTO_DEBUG
+ SKB_LINEAR_ASSERT(skb);
+ if (tailen > skb_tailroom(skb)) {
+ pr_warn("TX: skb tailroom is not enough: %d, requires: %d\n",
+ skb_tailroom(skb), tailen);
+ }
+#endif
+
+ if (unlikely(!skb_cloned(skb) && tailen <= skb_tailroom(skb))) {
+ nsg = 1;
+ trailer = skb;
+ } else {
+ /* TODO: We could avoid skb_cow_data() if skb has no frag_list
+ * e.g. by skb_fill_page_desc() to add another page to the skb
+ * with the wanted tailen... However, page skbs look not often,
+ * so take it easy now!
+ * Cloned skbs e.g. from link_xmit() seems no choice though :(
+ */
+ nsg = skb_cow_data(skb, tailen, &trailer);
+ if (unlikely(nsg < 0)) {
+ pr_err("TX: skb_cow_data() returned %d\n", nsg);
+ return nsg;
+ }
+ }
+
+ pskb_put(skb, trailer, tailen);
+
+ /* Allocate memory for the AEAD operation */
+ ctx = tipc_aead_mem_alloc(tfm, sizeof(*tx_ctx), &iv, &req, &sg, nsg);
+ if (unlikely(!ctx))
+ return -ENOMEM;
+ TIPC_SKB_CB(skb)->crypto_ctx = ctx;
+
+ /* Map skb to the sg lists */
+ sg_init_table(sg, nsg);
+ rc = skb_to_sgvec(skb, sg, 0, skb->len);
+ if (unlikely(rc < 0)) {
+ pr_err("TX: skb_to_sgvec() returned %d, nsg %d!\n", rc, nsg);
+ goto exit;
+ }
+
+ /* Prepare IV: [SALT (4 octets)][SEQNO (8 octets)]
+ * In case we're in cluster-key mode, SALT is varied by xor-ing with
+ * the source address (or w0 of id), otherwise with the dest address
+ * if dest is known.
+ */
+ ehdr = (struct tipc_ehdr *)skb->data;
+ salt = aead->salt;
+ if (aead->mode == CLUSTER_KEY)
+ salt ^= ehdr->addr; /* __be32 */
+ else if (__dnode)
+ salt ^= tipc_node_get_addr(__dnode);
+ memcpy(iv, &salt, 4);
+ memcpy(iv + 4, (u8 *)&ehdr->seqno, 8);
+
+ /* Prepare request */
+ ehsz = tipc_ehdr_size(ehdr);
+ aead_request_set_tfm(req, tfm);
+ aead_request_set_ad(req, ehsz);
+ aead_request_set_crypt(req, sg, sg, len - ehsz, iv);
+
+ /* Set callback function & data */
+ aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ tipc_aead_encrypt_done, skb);
+ tx_ctx = (struct tipc_crypto_tx_ctx *)ctx;
+ tx_ctx->aead = aead;
+ tx_ctx->bearer = b;
+ memcpy(&tx_ctx->dst, dst, sizeof(*dst));
+
+ /* Hold bearer */
+ if (unlikely(!tipc_bearer_hold(b))) {
+ rc = -ENODEV;
+ goto exit;
+ }
+
+ /* Now, do encrypt */
+ rc = crypto_aead_encrypt(req);
+ if (rc == -EINPROGRESS || rc == -EBUSY)
+ return rc;
+
+ tipc_bearer_put(b);
+
+exit:
+ kfree(ctx);
+ TIPC_SKB_CB(skb)->crypto_ctx = NULL;
+ return rc;
+}
+
+static void tipc_aead_encrypt_done(struct crypto_async_request *base, int err)
+{
+ struct sk_buff *skb = base->data;
+ struct tipc_crypto_tx_ctx *tx_ctx = TIPC_SKB_CB(skb)->crypto_ctx;
+ struct tipc_bearer *b = tx_ctx->bearer;
+ struct tipc_aead *aead = tx_ctx->aead;
+ struct tipc_crypto *tx = aead->crypto;
+ struct net *net = tx->net;
+
+ switch (err) {
+ case 0:
+ this_cpu_inc(tx->stats->stat[STAT_ASYNC_OK]);
+ if (likely(test_bit(0, &b->up)))
+ b->media->send_msg(net, skb, b, &tx_ctx->dst);
+ else
+ kfree_skb(skb);
+ break;
+ case -EINPROGRESS:
+ return;
+ default:
+ this_cpu_inc(tx->stats->stat[STAT_ASYNC_NOK]);
+ kfree_skb(skb);
+ break;
+ }
+
+ kfree(tx_ctx);
+ tipc_bearer_put(b);
+ tipc_aead_put(aead);
+}
+
+/**
+ * tipc_aead_decrypt - Decrypt an encrypted message
+ * @net: struct net
+ * @aead: TIPC AEAD for the message decryption
+ * @skb: the input/output skb
+ * @b: TIPC bearer where the message has been received
+ *
+ * Return:
+ * 0 : if the decryption has completed
+ * -EINPROGRESS/-EBUSY : if a callback will be performed
+ * < 0 : the decryption has failed
+ */
+static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead,
+ struct sk_buff *skb, struct tipc_bearer *b)
+{
+ struct tipc_crypto_rx_ctx *rx_ctx;
+ struct aead_request *req;
+ struct crypto_aead *tfm;
+ struct sk_buff *unused;
+ struct scatterlist *sg;
+ struct tipc_ehdr *ehdr;
+ int ehsz, nsg, rc;
+ void *ctx;
+ u32 salt;
+ u8 *iv;
+
+ if (unlikely(!aead))
+ return -ENOKEY;
+
+ /* Cow skb data if needed */
+ if (likely(!skb_cloned(skb) &&
+ (!skb_is_nonlinear(skb) || !skb_has_frag_list(skb)))) {
+ nsg = 1 + skb_shinfo(skb)->nr_frags;
+ } else {
+ nsg = skb_cow_data(skb, 0, &unused);
+ if (unlikely(nsg < 0)) {
+ pr_err("RX: skb_cow_data() returned %d\n", nsg);
+ return nsg;
+ }
+ }
+
+ /* Allocate memory for the AEAD operation */
+ tfm = tipc_aead_tfm_next(aead);
+ ctx = tipc_aead_mem_alloc(tfm, sizeof(*rx_ctx), &iv, &req, &sg, nsg);
+ if (unlikely(!ctx))
+ return -ENOMEM;
+ TIPC_SKB_CB(skb)->crypto_ctx = ctx;
+
+ /* Map skb to the sg lists */
+ sg_init_table(sg, nsg);
+ rc = skb_to_sgvec(skb, sg, 0, skb->len);
+ if (unlikely(rc < 0)) {
+ pr_err("RX: skb_to_sgvec() returned %d, nsg %d\n", rc, nsg);
+ goto exit;
+ }
+
+ /* Reconstruct IV: */
+ ehdr = (struct tipc_ehdr *)skb->data;
+ salt = aead->salt;
+ if (aead->mode == CLUSTER_KEY)
+ salt ^= ehdr->addr; /* __be32 */
+ else if (ehdr->destined)
+ salt ^= tipc_own_addr(net);
+ memcpy(iv, &salt, 4);
+ memcpy(iv + 4, (u8 *)&ehdr->seqno, 8);
+
+ /* Prepare request */
+ ehsz = tipc_ehdr_size(ehdr);
+ aead_request_set_tfm(req, tfm);
+ aead_request_set_ad(req, ehsz);
+ aead_request_set_crypt(req, sg, sg, skb->len - ehsz, iv);
+
+ /* Set callback function & data */
+ aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ tipc_aead_decrypt_done, skb);
+ rx_ctx = (struct tipc_crypto_rx_ctx *)ctx;
+ rx_ctx->aead = aead;
+ rx_ctx->bearer = b;
+
+ /* Hold bearer */
+ if (unlikely(!tipc_bearer_hold(b))) {
+ rc = -ENODEV;
+ goto exit;
+ }
+
+ /* Now, do decrypt */
+ rc = crypto_aead_decrypt(req);
+ if (rc == -EINPROGRESS || rc == -EBUSY)
+ return rc;
+
+ tipc_bearer_put(b);
+
+exit:
+ kfree(ctx);
+ TIPC_SKB_CB(skb)->crypto_ctx = NULL;
+ return rc;
+}
+
+static void tipc_aead_decrypt_done(struct crypto_async_request *base, int err)
+{
+ struct sk_buff *skb = base->data;
+ struct tipc_crypto_rx_ctx *rx_ctx = TIPC_SKB_CB(skb)->crypto_ctx;
+ struct tipc_bearer *b = rx_ctx->bearer;
+ struct tipc_aead *aead = rx_ctx->aead;
+ struct tipc_crypto_stats __percpu *stats = aead->crypto->stats;
+ struct net *net = aead->crypto->net;
+
+ switch (err) {
+ case 0:
+ this_cpu_inc(stats->stat[STAT_ASYNC_OK]);
+ break;
+ case -EINPROGRESS:
+ return;
+ default:
+ this_cpu_inc(stats->stat[STAT_ASYNC_NOK]);
+ break;
+ }
+
+ kfree(rx_ctx);
+ tipc_crypto_rcv_complete(net, aead, b, &skb, err);
+ if (likely(skb)) {
+ if (likely(test_bit(0, &b->up)))
+ tipc_rcv(net, skb, b);
+ else
+ kfree_skb(skb);
+ }
+
+ tipc_bearer_put(b);
+}
+
+static inline int tipc_ehdr_size(struct tipc_ehdr *ehdr)
+{
+ return (ehdr->user != LINK_CONFIG) ? EHDR_SIZE : EHDR_CFG_SIZE;
+}
+
+/**
+ * tipc_ehdr_validate - Validate an encryption message
+ * @skb: the message buffer
+ *
+ * Returns "true" if this is a valid encryption message, otherwise "false"
+ */
+bool tipc_ehdr_validate(struct sk_buff *skb)
+{
+ struct tipc_ehdr *ehdr;
+ int ehsz;
+
+ if (unlikely(!pskb_may_pull(skb, EHDR_MIN_SIZE)))
+ return false;
+
+ ehdr = (struct tipc_ehdr *)skb->data;
+ if (unlikely(ehdr->version != TIPC_EVERSION))
+ return false;
+ ehsz = tipc_ehdr_size(ehdr);
+ if (unlikely(!pskb_may_pull(skb, ehsz)))
+ return false;
+ if (unlikely(skb->len <= ehsz + TIPC_AES_GCM_TAG_SIZE))
+ return false;
+ if (unlikely(!ehdr->tx_key))
+ return false;
+
+ return true;
+}
+
+/**
+ * tipc_ehdr_build - Build TIPC encryption message header
+ * @net: struct net
+ * @aead: TX AEAD key to be used for the message encryption
+ * @tx_key: key id used for the message encryption
+ * @skb: input/output message skb
+ * @__rx: RX crypto handle if dest is "known"
+ *
+ * Return: the header size if the building is successful, otherwise < 0
+ */
+static int tipc_ehdr_build(struct net *net, struct tipc_aead *aead,
+ u8 tx_key, struct sk_buff *skb,
+ struct tipc_crypto *__rx)
+{
+ struct tipc_msg *hdr = buf_msg(skb);
+ struct tipc_ehdr *ehdr;
+ u32 user = msg_user(hdr);
+ u64 seqno;
+ int ehsz;
+
+ /* Make room for encryption header */
+ ehsz = (user != LINK_CONFIG) ? EHDR_SIZE : EHDR_CFG_SIZE;
+ WARN_ON(skb_headroom(skb) < ehsz);
+ ehdr = (struct tipc_ehdr *)skb_push(skb, ehsz);
+
+ /* Obtain a seqno first:
+ * Use the key seqno (= cluster wise) if dest is unknown or we're in
+ * cluster key mode, otherwise it's better for a per-peer seqno!
+ */
+ if (!__rx || aead->mode == CLUSTER_KEY)
+ seqno = atomic64_inc_return(&aead->seqno);
+ else
+ seqno = atomic64_inc_return(&__rx->sndnxt);
+
+ /* Revoke the key if seqno is wrapped around */
+ if (unlikely(!seqno))
+ return tipc_crypto_key_revoke(net, tx_key);
+
+ /* Word 1-2 */
+ ehdr->seqno = cpu_to_be64(seqno);
+
+ /* Words 0, 3- */
+ ehdr->version = TIPC_EVERSION;
+ ehdr->user = 0;
+ ehdr->keepalive = 0;
+ ehdr->tx_key = tx_key;
+ ehdr->destined = (__rx) ? 1 : 0;
+ ehdr->rx_key_active = (__rx) ? __rx->key.active : 0;
+ ehdr->reserved_1 = 0;
+ ehdr->reserved_2 = 0;
+
+ switch (user) {
+ case LINK_CONFIG:
+ ehdr->user = LINK_CONFIG;
+ memcpy(ehdr->id, tipc_own_id(net), NODE_ID_LEN);
+ break;
+ default:
+ if (user == LINK_PROTOCOL && msg_type(hdr) == STATE_MSG) {
+ ehdr->user = LINK_PROTOCOL;
+ ehdr->keepalive = msg_is_keepalive(hdr);
+ }
+ ehdr->addr = hdr->hdr[3];
+ break;
+ }
+
+ return ehsz;
+}
+
+static inline void tipc_crypto_key_set_state(struct tipc_crypto *c,
+ u8 new_passive,
+ u8 new_active,
+ u8 new_pending)
+{
+#ifdef TIPC_CRYPTO_DEBUG
+ struct tipc_key old = c->key;
+ char buf[32];
+#endif
+
+ c->key.keys = ((new_passive & KEY_MASK) << (KEY_BITS * 2)) |
+ ((new_active & KEY_MASK) << (KEY_BITS)) |
+ ((new_pending & KEY_MASK));
+
+#ifdef TIPC_CRYPTO_DEBUG
+ pr_info("%s(%s): key changing %s ::%pS\n",
+ (c->node) ? "RX" : "TX",
+ (c->node) ? tipc_node_get_id_str(c->node) :
+ tipc_own_id_string(c->net),
+ tipc_key_change_dump(old, c->key, buf),
+ __builtin_return_address(0));
+#endif
+}
+
+/**
+ * tipc_crypto_key_init - Initiate a new user / AEAD key
+ * @c: TIPC crypto to which new key is attached
+ * @ukey: the user key
+ * @mode: the key mode (CLUSTER_KEY or PER_NODE_KEY)
+ *
+ * A new TIPC AEAD key will be allocated and initiated with the specified user
+ * key, then attached to the TIPC crypto.
+ *
+ * Return: new key id in case of success, otherwise: < 0
+ */
+int tipc_crypto_key_init(struct tipc_crypto *c, struct tipc_aead_key *ukey,
+ u8 mode)
+{
+ struct tipc_aead *aead = NULL;
+ int rc = 0;
+
+ /* Initiate with the new user key */
+ rc = tipc_aead_init(&aead, ukey, mode);
+
+ /* Attach it to the crypto */
+ if (likely(!rc)) {
+ rc = tipc_crypto_key_attach(c, aead, 0);
+ if (rc < 0)
+ tipc_aead_free(&aead->rcu);
+ }
+
+ pr_info("%s(%s): key initiating, rc %d!\n",
+ (c->node) ? "RX" : "TX",
+ (c->node) ? tipc_node_get_id_str(c->node) :
+ tipc_own_id_string(c->net),
+ rc);
+
+ return rc;
+}
+
+/**
+ * tipc_crypto_key_attach - Attach a new AEAD key to TIPC crypto
+ * @c: TIPC crypto to which the new AEAD key is attached
+ * @aead: the new AEAD key pointer
+ * @pos: desired slot in the crypto key array, = 0 if any!
+ *
+ * Return: new key id in case of success, otherwise: -EBUSY
+ */
+static int tipc_crypto_key_attach(struct tipc_crypto *c,
+ struct tipc_aead *aead, u8 pos)
+{
+ u8 new_pending, new_passive, new_key;
+ struct tipc_key key;
+ int rc = -EBUSY;
+
+ spin_lock_bh(&c->lock);
+ key = c->key;
+ if (key.active && key.passive)
+ goto exit;
+ if (key.passive && !tipc_aead_users(c->aead[key.passive]))
+ goto exit;
+ if (key.pending) {
+ if (pos)
+ goto exit;
+ if (tipc_aead_users(c->aead[key.pending]) > 0)
+ goto exit;
+ /* Replace it */
+ new_pending = key.pending;
+ new_passive = key.passive;
+ new_key = new_pending;
+ } else {
+ if (pos) {
+ if (key.active && pos != key_next(key.active)) {
+ new_pending = key.pending;
+ new_passive = pos;
+ new_key = new_passive;
+ goto attach;
+ } else if (!key.active && !key.passive) {
+ new_pending = pos;
+ new_passive = key.passive;
+ new_key = new_pending;
+ goto attach;
+ }
+ }
+ new_pending = key_next(key.active ?: key.passive);
+ new_passive = key.passive;
+ new_key = new_pending;
+ }
+
+attach:
+ aead->crypto = c;
+ tipc_crypto_key_set_state(c, new_passive, key.active, new_pending);
+ tipc_aead_rcu_replace(c->aead[new_key], aead, &c->lock);
+
+ c->working = 1;
+ c->timer1 = jiffies;
+ c->timer2 = jiffies;
+ rc = new_key;
+
+exit:
+ spin_unlock_bh(&c->lock);
+ return rc;
+}
+
+void tipc_crypto_key_flush(struct tipc_crypto *c)
+{
+ int k;
+
+ spin_lock_bh(&c->lock);
+ c->working = 0;
+ tipc_crypto_key_set_state(c, 0, 0, 0);
+ for (k = KEY_MIN; k <= KEY_MAX; k++)
+ tipc_crypto_key_detach(c->aead[k], &c->lock);
+ atomic_set(&c->peer_rx_active, 0);
+ atomic64_set(&c->sndnxt, 0);
+ spin_unlock_bh(&c->lock);
+}
+
+/**
+ * tipc_crypto_key_try_align - Align RX keys if possible
+ * @rx: RX crypto handle
+ * @new_pending: new pending slot if aligned (= TX key from peer)
+ *
+ * Peer has used an unknown key slot, this only happens when peer has left and
+ * rejoned, or we are newcomer.
+ * That means, there must be no active key but a pending key at unaligned slot.
+ * If so, we try to move the pending key to the new slot.
+ * Note: A potential passive key can exist, it will be shifted correspondingly!
+ *
+ * Return: "true" if key is successfully aligned, otherwise "false"
+ */
+static bool tipc_crypto_key_try_align(struct tipc_crypto *rx, u8 new_pending)
+{
+ struct tipc_aead *tmp1, *tmp2 = NULL;
+ struct tipc_key key;
+ bool aligned = false;
+ u8 new_passive = 0;
+ int x;
+
+ spin_lock(&rx->lock);
+ key = rx->key;
+ if (key.pending == new_pending) {
+ aligned = true;
+ goto exit;
+ }
+ if (key.active)
+ goto exit;
+ if (!key.pending)
+ goto exit;
+ if (tipc_aead_users(rx->aead[key.pending]) > 0)
+ goto exit;
+
+ /* Try to "isolate" this pending key first */
+ tmp1 = tipc_aead_rcu_ptr(rx->aead[key.pending], &rx->lock);
+ if (!refcount_dec_if_one(&tmp1->refcnt))
+ goto exit;
+ rcu_assign_pointer(rx->aead[key.pending], NULL);
+
+ /* Move passive key if any */
+ if (key.passive) {
+ tipc_aead_rcu_swap(rx->aead[key.passive], tmp2, &rx->lock);
+ x = (key.passive - key.pending + new_pending) % KEY_MAX;
+ new_passive = (x <= 0) ? x + KEY_MAX : x;
+ }
+
+ /* Re-allocate the key(s) */
+ tipc_crypto_key_set_state(rx, new_passive, 0, new_pending);
+ rcu_assign_pointer(rx->aead[new_pending], tmp1);
+ if (new_passive)
+ rcu_assign_pointer(rx->aead[new_passive], tmp2);
+ refcount_set(&tmp1->refcnt, 1);
+ aligned = true;
+ pr_info("RX(%s): key is aligned!\n", tipc_node_get_id_str(rx->node));
+
+exit:
+ spin_unlock(&rx->lock);
+ return aligned;
+}
+
+/**
+ * tipc_crypto_key_pick_tx - Pick one TX key for message decryption
+ * @tx: TX crypto handle
+ * @rx: RX crypto handle (can be NULL)
+ * @skb: the message skb which will be decrypted later
+ *
+ * This function looks up the existing TX keys and pick one which is suitable
+ * for the message decryption, that must be a cluster key and not used before
+ * on the same message (i.e. recursive).
+ *
+ * Return: the TX AEAD key handle in case of success, otherwise NULL
+ */
+static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx,
+ struct tipc_crypto *rx,
+ struct sk_buff *skb)
+{
+ struct tipc_skb_cb *skb_cb = TIPC_SKB_CB(skb);
+ struct tipc_aead *aead = NULL;
+ struct tipc_key key = tx->key;
+ u8 k, i = 0;
+
+ /* Initialize data if not yet */
+ if (!skb_cb->tx_clone_deferred) {
+ skb_cb->tx_clone_deferred = 1;
+ memset(&skb_cb->tx_clone_ctx, 0, sizeof(skb_cb->tx_clone_ctx));
+ }
+
+ skb_cb->tx_clone_ctx.rx = rx;
+ if (++skb_cb->tx_clone_ctx.recurs > 2)
+ return NULL;
+
+ /* Pick one TX key */
+ spin_lock(&tx->lock);
+ do {
+ k = (i == 0) ? key.pending :
+ ((i == 1) ? key.active : key.passive);
+ if (!k)
+ continue;
+ aead = tipc_aead_rcu_ptr(tx->aead[k], &tx->lock);
+ if (!aead)
+ continue;
+ if (aead->mode != CLUSTER_KEY ||
+ aead == skb_cb->tx_clone_ctx.last) {
+ aead = NULL;
+ continue;
+ }
+ /* Ok, found one cluster key */
+ skb_cb->tx_clone_ctx.last = aead;
+ WARN_ON(skb->next);
+ skb->next = skb_clone(skb, GFP_ATOMIC);
+ if (unlikely(!skb->next))
+ pr_warn("Failed to clone skb for next round if any\n");
+ WARN_ON(!refcount_inc_not_zero(&aead->refcnt));
+ break;
+ } while (++i < 3);
+ spin_unlock(&tx->lock);
+
+ return aead;
+}
+
+/**
+ * tipc_crypto_key_synch: Synch own key data according to peer key status
+ * @rx: RX crypto handle
+ * @new_rx_active: latest RX active key from peer
+ * @hdr: TIPCv2 message
+ *
+ * This function updates the peer node related data as the peer RX active key
+ * has changed, so the number of TX keys' users on this node are increased and
+ * decreased correspondingly.
+ *
+ * The "per-peer" sndnxt is also reset when the peer key has switched.
+ */
+static void tipc_crypto_key_synch(struct tipc_crypto *rx, u8 new_rx_active,
+ struct tipc_msg *hdr)
+{
+ struct net *net = rx->net;
+ struct tipc_crypto *tx = tipc_net(net)->crypto_tx;
+ u8 cur_rx_active;
+
+ /* TX might be even not ready yet */
+ if (unlikely(!tx->key.active && !tx->key.pending))
+ return;
+
+ cur_rx_active = atomic_read(&rx->peer_rx_active);
+ if (likely(cur_rx_active == new_rx_active))
+ return;
+
+ /* Make sure this message destined for this node */
+ if (unlikely(msg_short(hdr) ||
+ msg_destnode(hdr) != tipc_own_addr(net)))
+ return;
+
+ /* Peer RX active key has changed, try to update owns' & TX users */
+ if (atomic_cmpxchg(&rx->peer_rx_active,
+ cur_rx_active,
+ new_rx_active) == cur_rx_active) {
+ if (new_rx_active)
+ tipc_aead_users_inc(tx->aead[new_rx_active], INT_MAX);
+ if (cur_rx_active)
+ tipc_aead_users_dec(tx->aead[cur_rx_active], 0);
+
+ atomic64_set(&rx->sndnxt, 0);
+ /* Mark the point TX key users changed */
+ tx->timer1 = jiffies;
+
+#ifdef TIPC_CRYPTO_DEBUG
+ pr_info("TX(%s): key users changed %d-- %d++, peer RX(%s)\n",
+ tipc_own_id_string(net), cur_rx_active,
+ new_rx_active, tipc_node_get_id_str(rx->node));
+#endif
+ }
+}
+
+static int tipc_crypto_key_revoke(struct net *net, u8 tx_key)
+{
+ struct tipc_crypto *tx = tipc_net(net)->crypto_tx;
+ struct tipc_key key;
+
+ spin_lock(&tx->lock);
+ key = tx->key;
+ WARN_ON(!key.active || tx_key != key.active);
+
+ /* Free the active key */
+ tipc_crypto_key_set_state(tx, key.passive, 0, key.pending);
+ tipc_crypto_key_detach(tx->aead[key.active], &tx->lock);
+ spin_unlock(&tx->lock);
+
+ pr_warn("TX(%s): key is revoked!\n", tipc_own_id_string(net));
+ return -EKEYREVOKED;
+}
+
+int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net,
+ struct tipc_node *node)
+{
+ struct tipc_crypto *c;
+
+ if (*crypto)
+ return -EEXIST;
+
+ /* Allocate crypto */
+ c = kzalloc(sizeof(*c), GFP_ATOMIC);
+ if (!c)
+ return -ENOMEM;
+
+ /* Allocate statistic structure */
+ c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC);
+ if (!c->stats) {
+ kzfree(c);
+ return -ENOMEM;
+ }
+
+ c->working = 0;
+ c->net = net;
+ c->node = node;
+ tipc_crypto_key_set_state(c, 0, 0, 0);
+ atomic_set(&c->peer_rx_active, 0);
+ atomic64_set(&c->sndnxt, 0);
+ c->timer1 = jiffies;
+ c->timer2 = jiffies;
+ spin_lock_init(&c->lock);
+ *crypto = c;
+
+ return 0;
+}
+
+void tipc_crypto_stop(struct tipc_crypto **crypto)
+{
+ struct tipc_crypto *c, *tx, *rx;
+ bool is_rx;
+ u8 k;
+
+ if (!*crypto)
+ return;
+
+ rcu_read_lock();
+ /* RX stopping? => decrease TX key users if any */
+ is_rx = !!((*crypto)->node);
+ if (is_rx) {
+ rx = *crypto;
+ tx = tipc_net(rx->net)->crypto_tx;
+ k = atomic_read(&rx->peer_rx_active);
+ if (k) {
+ tipc_aead_users_dec(tx->aead[k], 0);
+ /* Mark the point TX key users changed */
+ tx->timer1 = jiffies;
+ }
+ }
+
+ /* Release AEAD keys */
+ c = *crypto;
+ for (k = KEY_MIN; k <= KEY_MAX; k++)
+ tipc_aead_put(rcu_dereference(c->aead[k]));
+ rcu_read_unlock();
+
+ pr_warn("%s(%s) has been purged, node left!\n",
+ (is_rx) ? "RX" : "TX",
+ (is_rx) ? tipc_node_get_id_str((*crypto)->node) :
+ tipc_own_id_string((*crypto)->net));
+
+ /* Free this crypto statistics */
+ free_percpu(c->stats);
+
+ *crypto = NULL;
+ kzfree(c);
+}
+
+void tipc_crypto_timeout(struct tipc_crypto *rx)
+{
+ struct tipc_net *tn = tipc_net(rx->net);
+ struct tipc_crypto *tx = tn->crypto_tx;
+ struct tipc_key key;
+ u8 new_pending, new_passive;
+ int cmd;
+
+ /* TX key activating:
+ * The pending key (users > 0) -> active
+ * The active key if any (users == 0) -> free
+ */
+ spin_lock(&tx->lock);
+ key = tx->key;
+ if (key.active && tipc_aead_users(tx->aead[key.active]) > 0)
+ goto s1;
+ if (!key.pending || tipc_aead_users(tx->aead[key.pending]) <= 0)
+ goto s1;
+ if (time_before(jiffies, tx->timer1 + TIPC_TX_LASTING_LIM))
+ goto s1;
+
+ tipc_crypto_key_set_state(tx, key.passive, key.pending, 0);
+ if (key.active)
+ tipc_crypto_key_detach(tx->aead[key.active], &tx->lock);
+ this_cpu_inc(tx->stats->stat[STAT_SWITCHES]);
+ pr_info("TX(%s): key %d is activated!\n", tipc_own_id_string(tx->net),
+ key.pending);
+
+s1:
+ spin_unlock(&tx->lock);
+
+ /* RX key activating:
+ * The pending key (users > 0) -> active
+ * The active key if any -> passive, freed later
+ */
+ spin_lock(&rx->lock);
+ key = rx->key;
+ if (!key.pending || tipc_aead_users(rx->aead[key.pending]) <= 0)
+ goto s2;
+
+ new_pending = (key.passive &&
+ !tipc_aead_users(rx->aead[key.passive])) ?
+ key.passive : 0;
+ new_passive = (key.active) ?: ((new_pending) ? 0 : key.passive);
+ tipc_crypto_key_set_state(rx, new_passive, key.pending, new_pending);
+ this_cpu_inc(rx->stats->stat[STAT_SWITCHES]);
+ pr_info("RX(%s): key %d is activated!\n",
+ tipc_node_get_id_str(rx->node), key.pending);
+ goto s5;
+
+s2:
+ /* RX key "faulty" switching:
+ * The faulty pending key (users < -30) -> passive
+ * The passive key (users = 0) -> pending
+ * Note: This only happens after RX deactivated - s3!
+ */
+ key = rx->key;
+ if (!key.pending || tipc_aead_users(rx->aead[key.pending]) > -30)
+ goto s3;
+ if (!key.passive || tipc_aead_users(rx->aead[key.passive]) != 0)
+ goto s3;
+
+ new_pending = key.passive;
+ new_passive = key.pending;
+ tipc_crypto_key_set_state(rx, new_passive, key.active, new_pending);
+ goto s5;
+
+s3:
+ /* RX key deactivating:
+ * The passive key if any -> pending
+ * The active key -> passive (users = 0) / pending
+ * The pending key if any -> passive (users = 0)
+ */
+ key = rx->key;
+ if (!key.active)
+ goto s4;
+ if (time_before(jiffies, rx->timer1 + TIPC_RX_ACTIVE_LIM))
+ goto s4;
+
+ new_pending = (key.passive) ?: key.active;
+ new_passive = (key.passive) ? key.active : key.pending;
+ tipc_aead_users_set(rx->aead[new_pending], 0);
+ if (new_passive)
+ tipc_aead_users_set(rx->aead[new_passive], 0);
+ tipc_crypto_key_set_state(rx, new_passive, 0, new_pending);
+ pr_info("RX(%s): key %d is deactivated!\n",
+ tipc_node_get_id_str(rx->node), key.active);
+ goto s5;
+
+s4:
+ /* RX key passive -> freed: */
+ key = rx->key;
+ if (!key.passive || !tipc_aead_users(rx->aead[key.passive]))
+ goto s5;
+ if (time_before(jiffies, rx->timer2 + TIPC_RX_PASSIVE_LIM))
+ goto s5;
+
+ tipc_crypto_key_set_state(rx, 0, key.active, key.pending);
+ tipc_crypto_key_detach(rx->aead[key.passive], &rx->lock);
+ pr_info("RX(%s): key %d is freed!\n", tipc_node_get_id_str(rx->node),
+ key.passive);
+
+s5:
+ spin_unlock(&rx->lock);
+
+ /* Limit max_tfms & do debug commands if needed */
+ if (likely(sysctl_tipc_max_tfms <= TIPC_MAX_TFMS_LIM))
+ return;
+
+ cmd = sysctl_tipc_max_tfms;
+ sysctl_tipc_max_tfms = TIPC_MAX_TFMS_DEF;
+ tipc_crypto_do_cmd(rx->net, cmd);
+}
+
+/**
+ * tipc_crypto_xmit - Build & encrypt TIPC message for xmit
+ * @net: struct net
+ * @skb: input/output message skb pointer
+ * @b: bearer used for xmit later
+ * @dst: destination media address
+ * @__dnode: destination node for reference if any
+ *
+ * First, build an encryption message header on the top of the message, then
+ * encrypt the original TIPC message by using the active or pending TX key.
+ * If the encryption is successful, the encrypted skb is returned directly or
+ * via the callback.
+ * Otherwise, the skb is freed!
+ *
+ * Return:
+ * 0 : the encryption has succeeded (or no encryption)
+ * -EINPROGRESS/-EBUSY : the encryption is ongoing, a callback will be made
+ * -ENOKEK : the encryption has failed due to no key
+ * -EKEYREVOKED : the encryption has failed due to key revoked
+ * -ENOMEM : the encryption has failed due to no memory
+ * < 0 : the encryption has failed due to other reasons
+ */
+int tipc_crypto_xmit(struct net *net, struct sk_buff **skb,
+ struct tipc_bearer *b, struct tipc_media_addr *dst,
+ struct tipc_node *__dnode)
+{
+ struct tipc_crypto *__rx = tipc_node_crypto_rx(__dnode);
+ struct tipc_crypto *tx = tipc_net(net)->crypto_tx;
+ struct tipc_crypto_stats __percpu *stats = tx->stats;
+ struct tipc_key key = tx->key;
+ struct tipc_aead *aead = NULL;
+ struct sk_buff *probe;
+ int rc = -ENOKEY;
+ u8 tx_key;
+
+ /* No encryption? */
+ if (!tx->working)
+ return 0;
+
+ /* Try with the pending key if available and:
+ * 1) This is the only choice (i.e. no active key) or;
+ * 2) Peer has switched to this key (unicast only) or;
+ * 3) It is time to do a pending key probe;
+ */
+ if (unlikely(key.pending)) {
+ tx_key = key.pending;
+ if (!key.active)
+ goto encrypt;
+ if (__rx && atomic_read(&__rx->peer_rx_active) == tx_key)
+ goto encrypt;
+ if (TIPC_SKB_CB(*skb)->probe)
+ goto encrypt;
+ if (!__rx &&
+ time_after(jiffies, tx->timer2 + TIPC_TX_PROBE_LIM)) {
+ tx->timer2 = jiffies;
+ probe = skb_clone(*skb, GFP_ATOMIC);
+ if (probe) {
+ TIPC_SKB_CB(probe)->probe = 1;
+ tipc_crypto_xmit(net, &probe, b, dst, __dnode);
+ if (probe)
+ b->media->send_msg(net, probe, b, dst);
+ }
+ }
+ }
+ /* Else, use the active key if any */
+ if (likely(key.active)) {
+ tx_key = key.active;
+ goto encrypt;
+ }
+ goto exit;
+
+encrypt:
+ aead = tipc_aead_get(tx->aead[tx_key]);
+ if (unlikely(!aead))
+ goto exit;
+ rc = tipc_ehdr_build(net, aead, tx_key, *skb, __rx);
+ if (likely(rc > 0))
+ rc = tipc_aead_encrypt(aead, *skb, b, dst, __dnode);
+
+exit:
+ switch (rc) {
+ case 0:
+ this_cpu_inc(stats->stat[STAT_OK]);
+ break;
+ case -EINPROGRESS:
+ case -EBUSY:
+ this_cpu_inc(stats->stat[STAT_ASYNC]);
+ *skb = NULL;
+ return rc;
+ default:
+ this_cpu_inc(stats->stat[STAT_NOK]);
+ if (rc == -ENOKEY)
+ this_cpu_inc(stats->stat[STAT_NOKEYS]);
+ else if (rc == -EKEYREVOKED)
+ this_cpu_inc(stats->stat[STAT_BADKEYS]);
+ kfree_skb(*skb);
+ *skb = NULL;
+ break;
+ }
+
+ tipc_aead_put(aead);
+ return rc;
+}
+
+/**
+ * tipc_crypto_rcv - Decrypt an encrypted TIPC message from peer
+ * @net: struct net
+ * @rx: RX crypto handle
+ * @skb: input/output message skb pointer
+ * @b: bearer where the message has been received
+ *
+ * If the decryption is successful, the decrypted skb is returned directly or
+ * as the callback, the encryption header and auth tag will be trimed out
+ * before forwarding to tipc_rcv() via the tipc_crypto_rcv_complete().
+ * Otherwise, the skb will be freed!
+ * Note: RX key(s) can be re-aligned, or in case of no key suitable, TX
+ * cluster key(s) can be taken for decryption (- recursive).
+ *
+ * Return:
+ * 0 : the decryption has successfully completed
+ * -EINPROGRESS/-EBUSY : the decryption is ongoing, a callback will be made
+ * -ENOKEY : the decryption has failed due to no key
+ * -EBADMSG : the decryption has failed due to bad message
+ * -ENOMEM : the decryption has failed due to no memory
+ * < 0 : the decryption has failed due to other reasons
+ */
+int tipc_crypto_rcv(struct net *net, struct tipc_crypto *rx,
+ struct sk_buff **skb, struct tipc_bearer *b)
+{
+ struct tipc_crypto *tx = tipc_net(net)->crypto_tx;
+ struct tipc_crypto_stats __percpu *stats;
+ struct tipc_aead *aead = NULL;
+ struct tipc_key key;
+ int rc = -ENOKEY;
+ u8 tx_key = 0;
+
+ /* New peer?
+ * Let's try with TX key (i.e. cluster mode) & verify the skb first!
+ */
+ if (unlikely(!rx))
+ goto pick_tx;
+
+ /* Pick RX key according to TX key, three cases are possible:
+ * 1) The current active key (likely) or;
+ * 2) The pending (new or deactivated) key (if any) or;
+ * 3) The passive or old active key (i.e. users > 0);
+ */
+ tx_key = ((struct tipc_ehdr *)(*skb)->data)->tx_key;
+ key = rx->key;
+ if (likely(tx_key == key.active))
+ goto decrypt;
+ if (tx_key == key.pending)
+ goto decrypt;
+ if (tx_key == key.passive) {
+ rx->timer2 = jiffies;
+ if (tipc_aead_users(rx->aead[key.passive]) > 0)
+ goto decrypt;
+ }
+
+ /* Unknown key, let's try to align RX key(s) */
+ if (tipc_crypto_key_try_align(rx, tx_key))
+ goto decrypt;
+
+pick_tx:
+ /* No key suitable? Try to pick one from TX... */
+ aead = tipc_crypto_key_pick_tx(tx, rx, *skb);
+ if (aead)
+ goto decrypt;
+ goto exit;
+
+decrypt:
+ rcu_read_lock();
+ if (!aead)
+ aead = tipc_aead_get(rx->aead[tx_key]);
+ rc = tipc_aead_decrypt(net, aead, *skb, b);
+ rcu_read_unlock();
+
+exit:
+ stats = ((rx) ?: tx)->stats;
+ switch (rc) {
+ case 0:
+ this_cpu_inc(stats->stat[STAT_OK]);
+ break;
+ case -EINPROGRESS:
+ case -EBUSY:
+ this_cpu_inc(stats->stat[STAT_ASYNC]);
+ *skb = NULL;
+ return rc;
+ default:
+ this_cpu_inc(stats->stat[STAT_NOK]);
+ if (rc == -ENOKEY) {
+ kfree_skb(*skb);
+ *skb = NULL;
+ if (rx)
+ tipc_node_put(rx->node);
+ this_cpu_inc(stats->stat[STAT_NOKEYS]);
+ return rc;
+ } else if (rc == -EBADMSG) {
+ this_cpu_inc(stats->stat[STAT_BADMSGS]);
+ }
+ break;
+ }
+
+ tipc_crypto_rcv_complete(net, aead, b, skb, rc);
+ return rc;
+}
+
+static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead,
+ struct tipc_bearer *b,
+ struct sk_buff **skb, int err)
+{
+ struct tipc_skb_cb *skb_cb = TIPC_SKB_CB(*skb);
+ struct tipc_crypto *rx = aead->crypto;
+ struct tipc_aead *tmp = NULL;
+ struct tipc_ehdr *ehdr;
+ struct tipc_node *n;
+ u8 rx_key_active;
+ bool destined;
+
+ /* Is this completed by TX? */
+ if (unlikely(!rx->node)) {
+ rx = skb_cb->tx_clone_ctx.rx;
+#ifdef TIPC_CRYPTO_DEBUG
+ pr_info("TX->RX(%s): err %d, aead %p, skb->next %p, flags %x\n",
+ (rx) ? tipc_node_get_id_str(rx->node) : "-", err, aead,
+ (*skb)->next, skb_cb->flags);
+ pr_info("skb_cb [recurs %d, last %p], tx->aead [%p %p %p]\n",
+ skb_cb->tx_clone_ctx.recurs, skb_cb->tx_clone_ctx.last,
+ aead->crypto->aead[1], aead->crypto->aead[2],
+ aead->crypto->aead[3]);
+#endif
+ if (unlikely(err)) {
+ if (err == -EBADMSG && (*skb)->next)
+ tipc_rcv(net, (*skb)->next, b);
+ goto free_skb;
+ }
+
+ if (likely((*skb)->next)) {
+ kfree_skb((*skb)->next);
+ (*skb)->next = NULL;
+ }
+ ehdr = (struct tipc_ehdr *)(*skb)->data;
+ if (!rx) {
+ WARN_ON(ehdr->user != LINK_CONFIG);
+ n = tipc_node_create(net, 0, ehdr->id, 0xffffu, 0,
+ true);
+ rx = tipc_node_crypto_rx(n);
+ if (unlikely(!rx))
+ goto free_skb;
+ }
+
+ /* Skip cloning this time as we had a RX pending key */
+ if (rx->key.pending)
+ goto rcv;
+ if (tipc_aead_clone(&tmp, aead) < 0)
+ goto rcv;
+ if (tipc_crypto_key_attach(rx, tmp, ehdr->tx_key) < 0) {
+ tipc_aead_free(&tmp->rcu);
+ goto rcv;
+ }
+ tipc_aead_put(aead);
+ aead = tipc_aead_get(tmp);
+ }
+
+ if (unlikely(err)) {
+ tipc_aead_users_dec(aead, INT_MIN);
+ goto free_skb;
+ }
+
+ /* Set the RX key's user */
+ tipc_aead_users_set(aead, 1);
+
+rcv:
+ /* Mark this point, RX works */
+ rx->timer1 = jiffies;
+
+ /* Remove ehdr & auth. tag prior to tipc_rcv() */
+ ehdr = (struct tipc_ehdr *)(*skb)->data;
+ destined = ehdr->destined;
+ rx_key_active = ehdr->rx_key_active;
+ skb_pull(*skb, tipc_ehdr_size(ehdr));
+ pskb_trim(*skb, (*skb)->len - aead->authsize);
+
+ /* Validate TIPCv2 message */
+ if (unlikely(!tipc_msg_validate(skb))) {
+ pr_err_ratelimited("Packet dropped after decryption!\n");
+ goto free_skb;
+ }
+
+ /* Update peer RX active key & TX users */
+ if (destined)
+ tipc_crypto_key_synch(rx, rx_key_active, buf_msg(*skb));
+
+ /* Mark skb decrypted */
+ skb_cb->decrypted = 1;
+
+ /* Clear clone cxt if any */
+ if (likely(!skb_cb->tx_clone_deferred))
+ goto exit;
+ skb_cb->tx_clone_deferred = 0;
+ memset(&skb_cb->tx_clone_ctx, 0, sizeof(skb_cb->tx_clone_ctx));
+ goto exit;
+
+free_skb:
+ kfree_skb(*skb);
+ *skb = NULL;
+
+exit:
+ tipc_aead_put(aead);
+ if (rx)
+ tipc_node_put(rx->node);
+}
+
+static void tipc_crypto_do_cmd(struct net *net, int cmd)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_crypto *tx = tn->crypto_tx, *rx;
+ struct list_head *p;
+ unsigned int stat;
+ int i, j, cpu;
+ char buf[200];
+
+ /* Currently only one command is supported */
+ switch (cmd) {
+ case 0xfff1:
+ goto print_stats;
+ default:
+ return;
+ }
+
+print_stats:
+ /* Print a header */
+ pr_info("\n=============== TIPC Crypto Statistics ===============\n\n");
+
+ /* Print key status */
+ pr_info("Key status:\n");
+ pr_info("TX(%7.7s)\n%s", tipc_own_id_string(net),
+ tipc_crypto_key_dump(tx, buf));
+
+ rcu_read_lock();
+ for (p = tn->node_list.next; p != &tn->node_list; p = p->next) {
+ rx = tipc_node_crypto_rx_by_list(p);
+ pr_info("RX(%7.7s)\n%s", tipc_node_get_id_str(rx->node),
+ tipc_crypto_key_dump(rx, buf));
+ }
+ rcu_read_unlock();
+
+ /* Print crypto statistics */
+ for (i = 0, j = 0; i < MAX_STATS; i++)
+ j += scnprintf(buf + j, 200 - j, "|%11s ", hstats[i]);
+ pr_info("\nCounter %s", buf);
+
+ memset(buf, '-', 115);
+ buf[115] = '\0';
+ pr_info("%s\n", buf);
+
+ j = scnprintf(buf, 200, "TX(%7.7s) ", tipc_own_id_string(net));
+ for_each_possible_cpu(cpu) {
+ for (i = 0; i < MAX_STATS; i++) {
+ stat = per_cpu_ptr(tx->stats, cpu)->stat[i];
+ j += scnprintf(buf + j, 200 - j, "|%11d ", stat);
+ }
+ pr_info("%s", buf);
+ j = scnprintf(buf, 200, "%12s", " ");
+ }
+
+ rcu_read_lock();
+ for (p = tn->node_list.next; p != &tn->node_list; p = p->next) {
+ rx = tipc_node_crypto_rx_by_list(p);
+ j = scnprintf(buf, 200, "RX(%7.7s) ",
+ tipc_node_get_id_str(rx->node));
+ for_each_possible_cpu(cpu) {
+ for (i = 0; i < MAX_STATS; i++) {
+ stat = per_cpu_ptr(rx->stats, cpu)->stat[i];
+ j += scnprintf(buf + j, 200 - j, "|%11d ",
+ stat);
+ }
+ pr_info("%s", buf);
+ j = scnprintf(buf, 200, "%12s", " ");
+ }
+ }
+ rcu_read_unlock();
+
+ pr_info("\n======================== Done ========================\n");
+}
+
+static char *tipc_crypto_key_dump(struct tipc_crypto *c, char *buf)
+{
+ struct tipc_key key = c->key;
+ struct tipc_aead *aead;
+ int k, i = 0;
+ char *s;
+
+ for (k = KEY_MIN; k <= KEY_MAX; k++) {
+ if (k == key.passive)
+ s = "PAS";
+ else if (k == key.active)
+ s = "ACT";
+ else if (k == key.pending)
+ s = "PEN";
+ else
+ s = "-";
+ i += scnprintf(buf + i, 200 - i, "\tKey%d: %s", k, s);
+
+ rcu_read_lock();
+ aead = rcu_dereference(c->aead[k]);
+ if (aead)
+ i += scnprintf(buf + i, 200 - i,
+ "{\"%s...\", \"%s\"}/%d:%d",
+ aead->hint,
+ (aead->mode == CLUSTER_KEY) ? "c" : "p",
+ atomic_read(&aead->users),
+ refcount_read(&aead->refcnt));
+ rcu_read_unlock();
+ i += scnprintf(buf + i, 200 - i, "\n");
+ }
+
+ if (c->node)
+ i += scnprintf(buf + i, 200 - i, "\tPeer RX active: %d\n",
+ atomic_read(&c->peer_rx_active));
+
+ return buf;
+}
+
+#ifdef TIPC_CRYPTO_DEBUG
+static char *tipc_key_change_dump(struct tipc_key old, struct tipc_key new,
+ char *buf)
+{
+ struct tipc_key *key = &old;
+ int k, i = 0;
+ char *s;
+
+ /* Output format: "[%s %s %s] -> [%s %s %s]", max len = 32 */
+again:
+ i += scnprintf(buf + i, 32 - i, "[");
+ for (k = KEY_MIN; k <= KEY_MAX; k++) {
+ if (k == key->passive)
+ s = "pas";
+ else if (k == key->active)
+ s = "act";
+ else if (k == key->pending)
+ s = "pen";
+ else
+ s = "-";
+ i += scnprintf(buf + i, 32 - i,
+ (k != KEY_MAX) ? "%s " : "%s", s);
+ }
+ if (key != &new) {
+ i += scnprintf(buf + i, 32 - i, "] -> ");
+ key = &new;
+ goto again;
+ }
+ i += scnprintf(buf + i, 32 - i, "]");
+ return buf;
+}
+#endif
diff --git a/net/tipc/crypto.h b/net/tipc/crypto.h
new file mode 100644
index 000000000000..c3de769f49e8
--- /dev/null
+++ b/net/tipc/crypto.h
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/**
+ * net/tipc/crypto.h: Include file for TIPC crypto
+ *
+ * Copyright (c) 2019, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef CONFIG_TIPC_CRYPTO
+#ifndef _TIPC_CRYPTO_H
+#define _TIPC_CRYPTO_H
+
+#include "core.h"
+#include "node.h"
+#include "msg.h"
+#include "bearer.h"
+
+#define TIPC_EVERSION 7
+
+/* AEAD aes(gcm) */
+#define TIPC_AES_GCM_KEY_SIZE_128 16
+#define TIPC_AES_GCM_KEY_SIZE_192 24
+#define TIPC_AES_GCM_KEY_SIZE_256 32
+
+#define TIPC_AES_GCM_SALT_SIZE 4
+#define TIPC_AES_GCM_IV_SIZE 12
+#define TIPC_AES_GCM_TAG_SIZE 16
+
+/**
+ * TIPC crypto modes:
+ * - CLUSTER_KEY:
+ * One single key is used for both TX & RX in all nodes in the cluster.
+ * - PER_NODE_KEY:
+ * Each nodes in the cluster has one TX key, for RX a node needs to know
+ * its peers' TX key for the decryption of messages from those nodes.
+ */
+enum {
+ CLUSTER_KEY = 1,
+ PER_NODE_KEY = (1 << 1),
+};
+
+extern int sysctl_tipc_max_tfms __read_mostly;
+
+/**
+ * TIPC encryption message format:
+ *
+ * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
+ * 1 0 9 8 7 6 5 4|3 2 1 0 9 8 7 6|5 4 3 2 1 0 9 8|7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * w0:|Ver=7| User |D|TX |RX |K| Rsvd |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * w1:| Seqno |
+ * w2:| (8 octets) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * w3:\ Prevnode \
+ * / (4 or 16 octets) /
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * \ \
+ * / Encrypted complete TIPC V2 header and user data /
+ * \ \
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | AuthTag |
+ * | (16 octets) |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Word0:
+ * Ver : = 7 i.e. TIPC encryption message version
+ * User : = 7 (for LINK_PROTOCOL); = 13 (for LINK_CONFIG) or = 0
+ * D : The destined bit i.e. the message's destination node is
+ * "known" or not at the message encryption
+ * TX : TX key used for the message encryption
+ * RX : Currently RX active key corresponding to the destination
+ * node's TX key (when the "D" bit is set)
+ * K : Keep-alive bit (for RPS, LINK_PROTOCOL/STATE_MSG only)
+ * Rsvd : Reserved bit, field
+ * Word1-2:
+ * Seqno : The 64-bit sequence number of the encrypted message, also
+ * part of the nonce used for the message encryption/decryption
+ * Word3-:
+ * Prevnode: The source node address, or ID in case LINK_CONFIG only
+ * AuthTag : The authentication tag for the message integrity checking
+ * generated by the message encryption
+ */
+struct tipc_ehdr {
+ union {
+ struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 destined:1,
+ user:4,
+ version:3;
+ __u8 reserved_1:3,
+ keepalive:1,
+ rx_key_active:2,
+ tx_key:2;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u8 version:3,
+ user:4,
+ destined:1;
+ __u8 tx_key:2,
+ rx_key_active:2,
+ keepalive:1,
+ reserved_1:3;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ __be16 reserved_2;
+ } __packed;
+ __be32 w0;
+ };
+ __be64 seqno;
+ union {
+ __be32 addr;
+ __u8 id[NODE_ID_LEN]; /* For a LINK_CONFIG message only! */
+ };
+#define EHDR_SIZE (offsetof(struct tipc_ehdr, addr) + sizeof(__be32))
+#define EHDR_CFG_SIZE (sizeof(struct tipc_ehdr))
+#define EHDR_MIN_SIZE (EHDR_SIZE)
+#define EHDR_MAX_SIZE (EHDR_CFG_SIZE)
+#define EMSG_OVERHEAD (EHDR_SIZE + TIPC_AES_GCM_TAG_SIZE)
+} __packed;
+
+int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net,
+ struct tipc_node *node);
+void tipc_crypto_stop(struct tipc_crypto **crypto);
+void tipc_crypto_timeout(struct tipc_crypto *rx);
+int tipc_crypto_xmit(struct net *net, struct sk_buff **skb,
+ struct tipc_bearer *b, struct tipc_media_addr *dst,
+ struct tipc_node *__dnode);
+int tipc_crypto_rcv(struct net *net, struct tipc_crypto *rx,
+ struct sk_buff **skb, struct tipc_bearer *b);
+int tipc_crypto_key_init(struct tipc_crypto *c, struct tipc_aead_key *ukey,
+ u8 mode);
+void tipc_crypto_key_flush(struct tipc_crypto *c);
+int tipc_aead_key_validate(struct tipc_aead_key *ukey);
+bool tipc_ehdr_validate(struct sk_buff *skb);
+
+#endif /* _TIPC_CRYPTO_H */
+#endif
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index c138d68e8a69..b043e8c6397a 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -94,6 +94,7 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb,
msg_set_dest_domain(hdr, dest_domain);
msg_set_bc_netid(hdr, tn->net_id);
b->media->addr2msg(msg_media_addr(hdr), &b->addr);
+ msg_set_peer_net_hash(hdr, tipc_net_hash_mixes(net, tn->random));
msg_set_node_id(hdr, tipc_own_id(net));
}
@@ -242,7 +243,8 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
if (!tipc_in_scope(legacy, b->domain, src))
return;
tipc_node_check_dest(net, src, peer_id, b, caps, signature,
- &maddr, &respond, &dupl_addr);
+ msg_peer_net_hash(hdr), &maddr, &respond,
+ &dupl_addr);
if (dupl_addr)
disc_dupl_alert(b, src, &maddr);
if (!respond)
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 999eab592de8..fb72031228c9 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -44,6 +44,7 @@
#include "netlink.h"
#include "monitor.h"
#include "trace.h"
+#include "crypto.h"
#include <linux/pkt_sched.h>
@@ -397,6 +398,15 @@ int tipc_link_mtu(struct tipc_link *l)
return l->mtu;
}
+int tipc_link_mss(struct tipc_link *l)
+{
+#ifdef CONFIG_TIPC_CRYPTO
+ return l->mtu - INT_H_SIZE - EMSG_OVERHEAD;
+#else
+ return l->mtu - INT_H_SIZE;
+#endif
+}
+
u16 tipc_link_rcv_nxt(struct tipc_link *l)
{
return l->rcv_nxt;
@@ -940,16 +950,18 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
struct sk_buff_head *xmitq)
{
struct tipc_msg *hdr = buf_msg(skb_peek(list));
- unsigned int maxwin = l->window;
- int imp = msg_importance(hdr);
- unsigned int mtu = l->mtu;
+ struct sk_buff_head *backlogq = &l->backlogq;
+ struct sk_buff_head *transmq = &l->transmq;
+ struct sk_buff *skb, *_skb;
+ u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
u16 ack = l->rcv_nxt - 1;
u16 seqno = l->snd_nxt;
- u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
- struct sk_buff_head *transmq = &l->transmq;
- struct sk_buff_head *backlogq = &l->backlogq;
- struct sk_buff *skb, *_skb, **tskb;
int pkt_cnt = skb_queue_len(list);
+ int imp = msg_importance(hdr);
+ unsigned int mss = tipc_link_mss(l);
+ unsigned int maxwin = l->window;
+ unsigned int mtu = l->mtu;
+ bool new_bundle;
int rc = 0;
if (unlikely(msg_size(hdr) > mtu)) {
@@ -975,20 +987,18 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
}
/* Prepare each packet for sending, and add to relevant queue: */
- while (skb_queue_len(list)) {
- skb = skb_peek(list);
- hdr = buf_msg(skb);
- msg_set_seqno(hdr, seqno);
- msg_set_ack(hdr, ack);
- msg_set_bcast_ack(hdr, bc_ack);
-
+ while ((skb = __skb_dequeue(list))) {
if (likely(skb_queue_len(transmq) < maxwin)) {
+ hdr = buf_msg(skb);
+ msg_set_seqno(hdr, seqno);
+ msg_set_ack(hdr, ack);
+ msg_set_bcast_ack(hdr, bc_ack);
_skb = skb_clone(skb, GFP_ATOMIC);
if (!_skb) {
+ kfree_skb(skb);
__skb_queue_purge(list);
return -ENOBUFS;
}
- __skb_dequeue(list);
__skb_queue_tail(transmq, skb);
/* next retransmit attempt */
if (link_is_bc_sndlink(l))
@@ -1000,22 +1010,25 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
seqno++;
continue;
}
- tskb = &l->backlog[imp].target_bskb;
- if (tipc_msg_bundle(*tskb, hdr, mtu)) {
- kfree_skb(__skb_dequeue(list));
- l->stats.sent_bundled++;
- continue;
- }
- if (tipc_msg_make_bundle(tskb, hdr, mtu, l->addr)) {
- kfree_skb(__skb_dequeue(list));
- __skb_queue_tail(backlogq, *tskb);
- l->backlog[imp].len++;
- l->stats.sent_bundled++;
- l->stats.sent_bundles++;
+ if (tipc_msg_try_bundle(l->backlog[imp].target_bskb, &skb,
+ mss, l->addr, &new_bundle)) {
+ if (skb) {
+ /* Keep a ref. to the skb for next try */
+ l->backlog[imp].target_bskb = skb;
+ l->backlog[imp].len++;
+ __skb_queue_tail(backlogq, skb);
+ } else {
+ if (new_bundle) {
+ l->stats.sent_bundles++;
+ l->stats.sent_bundled++;
+ }
+ l->stats.sent_bundled++;
+ }
continue;
}
l->backlog[imp].target_bskb = NULL;
- l->backlog[imp].len += skb_queue_len(list);
+ l->backlog[imp].len += (1 + skb_queue_len(list));
+ __skb_queue_tail(backlogq, skb);
skb_queue_splice_tail_init(list, backlogq);
}
l->snd_nxt = seqno;
@@ -1084,7 +1097,7 @@ static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r,
return false;
if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp +
- msecs_to_jiffies(r->tolerance)))
+ msecs_to_jiffies(r->tolerance * 10)))
return false;
hdr = buf_msg(skb);
@@ -1151,7 +1164,7 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
continue;
TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
- _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE, GFP_ATOMIC);
+ _skb = pskb_copy(skb, GFP_ATOMIC);
if (!_skb)
return 0;
hdr = buf_msg(_skb);
@@ -1427,8 +1440,7 @@ next_gap_ack:
if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
continue;
TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME;
- _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE,
- GFP_ATOMIC);
+ _skb = pskb_copy(skb, GFP_ATOMIC);
if (!_skb)
continue;
hdr = buf_msg(_skb);
@@ -1728,21 +1740,6 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
return;
__skb_queue_head_init(&tnlq);
- __skb_queue_head_init(&tmpxq);
- __skb_queue_head_init(&frags);
-
- /* At least one packet required for safe algorithm => add dummy */
- skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG,
- BASIC_H_SIZE, 0, l->addr, tipc_own_addr(l->net),
- 0, 0, TIPC_ERR_NO_PORT);
- if (!skb) {
- pr_warn("%sunable to create tunnel packet\n", link_co_err);
- return;
- }
- __skb_queue_tail(&tnlq, skb);
- tipc_link_xmit(l, &tnlq, &tmpxq);
- __skb_queue_purge(&tmpxq);
-
/* Link Synching:
* From now on, send only one single ("dummy") SYNCH message
* to peer. The SYNCH message does not contain any data, just
@@ -1768,6 +1765,20 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
return;
}
+ __skb_queue_head_init(&tmpxq);
+ __skb_queue_head_init(&frags);
+ /* At least one packet required for safe algorithm => add dummy */
+ skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG,
+ BASIC_H_SIZE, 0, l->addr, tipc_own_addr(l->net),
+ 0, 0, TIPC_ERR_NO_PORT);
+ if (!skb) {
+ pr_warn("%sunable to create tunnel packet\n", link_co_err);
+ return;
+ }
+ __skb_queue_tail(&tnlq, skb);
+ tipc_link_xmit(l, &tnlq, &tmpxq);
+ __skb_queue_purge(&tmpxq);
+
/* Initialize reusable tunnel packet header */
tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL,
mtyp, INT_H_SIZE, l->addr);
@@ -1873,7 +1884,7 @@ void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl,
tipc_link_create_dummy_tnl_msg(tnl, xmitq);
- /* This failover link enpoint was never established before,
+ /* This failover link endpoint was never established before,
* so it has not received anything from peer.
* Otherwise, it must be a normal failover situation or the
* node has entered SELF_DOWN_PEER_LEAVING and both peer nodes
diff --git a/net/tipc/link.h b/net/tipc/link.h
index adcad65e761c..c09e9d49d0a3 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -141,6 +141,7 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
int tipc_link_bc_peers(struct tipc_link *l);
void tipc_link_set_mtu(struct tipc_link *l, int mtu);
int tipc_link_mtu(struct tipc_link *l);
+int tipc_link_mss(struct tipc_link *l);
void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
struct sk_buff_head *xmitq);
void tipc_link_build_bc_sync_msg(struct tipc_link *l,
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 922d262e153f..0d515d20b056 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -39,10 +39,16 @@
#include "msg.h"
#include "addr.h"
#include "name_table.h"
+#include "crypto.h"
#define MAX_FORWARD_SIZE 1024
+#ifdef CONFIG_TIPC_CRYPTO
+#define BUF_HEADROOM ALIGN(((LL_MAX_HEADER + 48) + EHDR_MAX_SIZE), 16)
+#define BUF_TAILROOM (TIPC_AES_GCM_TAG_SIZE)
+#else
#define BUF_HEADROOM (LL_MAX_HEADER + 48)
#define BUF_TAILROOM 16
+#endif
static unsigned int align(unsigned int i)
{
@@ -61,7 +67,11 @@ static unsigned int align(unsigned int i)
struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp)
{
struct sk_buff *skb;
+#ifdef CONFIG_TIPC_CRYPTO
+ unsigned int buf_size = (BUF_HEADROOM + size + BUF_TAILROOM + 3) & ~3u;
+#else
unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
+#endif
skb = alloc_skb_fclone(buf_size, gfp);
if (skb) {
@@ -173,7 +183,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
}
if (fragid == LAST_FRAGMENT) {
- TIPC_SKB_CB(head)->validated = false;
+ TIPC_SKB_CB(head)->validated = 0;
if (unlikely(!tipc_msg_validate(&head)))
goto err;
*buf = head;
@@ -190,6 +200,59 @@ err:
return 0;
}
+/**
+ * tipc_msg_append(): Append data to tail of an existing buffer queue
+ * @hdr: header to be used
+ * @m: the data to be appended
+ * @mss: max allowable size of buffer
+ * @dlen: size of data to be appended
+ * @txq: queue to appand to
+ * Returns the number og 1k blocks appended or errno value
+ */
+int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen,
+ int mss, struct sk_buff_head *txq)
+{
+ struct sk_buff *skb, *prev;
+ int accounted, total, curr;
+ int mlen, cpy, rem = dlen;
+ struct tipc_msg *hdr;
+
+ skb = skb_peek_tail(txq);
+ accounted = skb ? msg_blocks(buf_msg(skb)) : 0;
+ total = accounted;
+
+ while (rem) {
+ if (!skb || skb->len >= mss) {
+ prev = skb;
+ skb = tipc_buf_acquire(mss, GFP_KERNEL);
+ if (unlikely(!skb))
+ return -ENOMEM;
+ skb_orphan(skb);
+ skb_trim(skb, MIN_H_SIZE);
+ hdr = buf_msg(skb);
+ skb_copy_to_linear_data(skb, _hdr, MIN_H_SIZE);
+ msg_set_hdr_sz(hdr, MIN_H_SIZE);
+ msg_set_size(hdr, MIN_H_SIZE);
+ __skb_queue_tail(txq, skb);
+ total += 1;
+ if (prev)
+ msg_set_ack_required(buf_msg(prev), 0);
+ msg_set_ack_required(hdr, 1);
+ }
+ hdr = buf_msg(skb);
+ curr = msg_blocks(hdr);
+ mlen = msg_size(hdr);
+ cpy = min_t(int, rem, mss - mlen);
+ if (cpy != copy_from_iter(skb->data + mlen, cpy, &m->msg_iter))
+ return -EFAULT;
+ msg_set_size(hdr, mlen + cpy);
+ skb_put(skb, cpy);
+ rem -= cpy;
+ total += msg_blocks(hdr) - curr;
+ }
+ return total - accounted;
+}
+
/* tipc_msg_validate - validate basic format of received message
*
* This routine ensures a TIPC message has an acceptable header, and at least
@@ -218,6 +281,7 @@ bool tipc_msg_validate(struct sk_buff **_skb)
if (unlikely(TIPC_SKB_CB(skb)->validated))
return true;
+
if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE)))
return false;
@@ -239,7 +303,7 @@ bool tipc_msg_validate(struct sk_buff **_skb)
if (unlikely(skb->len < msz))
return false;
- TIPC_SKB_CB(skb)->validated = true;
+ TIPC_SKB_CB(skb)->validated = 1;
return true;
}
@@ -419,48 +483,98 @@ error:
}
/**
- * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one
- * @skb: the buffer to append to ("bundle")
- * @msg: message to be appended
- * @mtu: max allowable size for the bundle buffer
- * Consumes buffer if successful
- * Returns true if bundling could be performed, otherwise false
+ * tipc_msg_bundle - Append contents of a buffer to tail of an existing one
+ * @bskb: the bundle buffer to append to
+ * @msg: message to be appended
+ * @max: max allowable size for the bundle buffer
+ *
+ * Returns "true" if bundling has been performed, otherwise "false"
*/
-bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu)
+static bool tipc_msg_bundle(struct sk_buff *bskb, struct tipc_msg *msg,
+ u32 max)
{
- struct tipc_msg *bmsg;
- unsigned int bsz;
- unsigned int msz = msg_size(msg);
- u32 start, pad;
- u32 max = mtu - INT_H_SIZE;
+ struct tipc_msg *bmsg = buf_msg(bskb);
+ u32 msz, bsz, offset, pad;
- if (likely(msg_user(msg) == MSG_FRAGMENTER))
- return false;
- if (!skb)
- return false;
- bmsg = buf_msg(skb);
+ msz = msg_size(msg);
bsz = msg_size(bmsg);
- start = align(bsz);
- pad = start - bsz;
+ offset = align(bsz);
+ pad = offset - bsz;
- if (unlikely(msg_user(msg) == TUNNEL_PROTOCOL))
+ if (unlikely(skb_tailroom(bskb) < (pad + msz)))
return false;
- if (unlikely(msg_user(msg) == BCAST_PROTOCOL))
+ if (unlikely(max < (offset + msz)))
return false;
- if (unlikely(msg_user(bmsg) != MSG_BUNDLER))
+
+ skb_put(bskb, pad + msz);
+ skb_copy_to_linear_data_offset(bskb, offset, msg, msz);
+ msg_set_size(bmsg, offset + msz);
+ msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1);
+ return true;
+}
+
+/**
+ * tipc_msg_try_bundle - Try to bundle a new message to the last one
+ * @tskb: the last/target message to which the new one will be appended
+ * @skb: the new message skb pointer
+ * @mss: max message size (header inclusive)
+ * @dnode: destination node for the message
+ * @new_bundle: if this call made a new bundle or not
+ *
+ * Return: "true" if the new message skb is potential for bundling this time or
+ * later, in the case a bundling has been done this time, the skb is consumed
+ * (the skb pointer = NULL).
+ * Otherwise, "false" if the skb cannot be bundled at all.
+ */
+bool tipc_msg_try_bundle(struct sk_buff *tskb, struct sk_buff **skb, u32 mss,
+ u32 dnode, bool *new_bundle)
+{
+ struct tipc_msg *msg, *inner, *outer;
+ u32 tsz;
+
+ /* First, check if the new buffer is suitable for bundling */
+ msg = buf_msg(*skb);
+ if (msg_user(msg) == MSG_FRAGMENTER)
return false;
- if (unlikely(skb_tailroom(skb) < (pad + msz)))
+ if (msg_user(msg) == TUNNEL_PROTOCOL)
return false;
- if (unlikely(max < (start + msz)))
+ if (msg_user(msg) == BCAST_PROTOCOL)
return false;
- if ((msg_importance(msg) < TIPC_SYSTEM_IMPORTANCE) &&
- (msg_importance(bmsg) == TIPC_SYSTEM_IMPORTANCE))
+ if (mss <= INT_H_SIZE + msg_size(msg))
return false;
- skb_put(skb, pad + msz);
- skb_copy_to_linear_data_offset(skb, start, msg, msz);
- msg_set_size(bmsg, start + msz);
- msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1);
+ /* Ok, but the last/target buffer can be empty? */
+ if (unlikely(!tskb))
+ return true;
+
+ /* Is it a bundle already? Try to bundle the new message to it */
+ if (msg_user(buf_msg(tskb)) == MSG_BUNDLER) {
+ *new_bundle = false;
+ goto bundle;
+ }
+
+ /* Make a new bundle of the two messages if possible */
+ tsz = msg_size(buf_msg(tskb));
+ if (unlikely(mss < align(INT_H_SIZE + tsz) + msg_size(msg)))
+ return true;
+ if (unlikely(pskb_expand_head(tskb, INT_H_SIZE, mss - tsz - INT_H_SIZE,
+ GFP_ATOMIC)))
+ return true;
+ inner = buf_msg(tskb);
+ skb_push(tskb, INT_H_SIZE);
+ outer = buf_msg(tskb);
+ tipc_msg_init(msg_prevnode(inner), outer, MSG_BUNDLER, 0, INT_H_SIZE,
+ dnode);
+ msg_set_importance(outer, msg_importance(inner));
+ msg_set_size(outer, INT_H_SIZE + tsz);
+ msg_set_msgcnt(outer, 1);
+ *new_bundle = true;
+
+bundle:
+ if (likely(tipc_msg_bundle(tskb, msg, mss))) {
+ consume_skb(*skb);
+ *skb = NULL;
+ }
return true;
}
@@ -510,49 +624,6 @@ none:
}
/**
- * tipc_msg_make_bundle(): Create bundle buf and append message to its tail
- * @list: the buffer chain, where head is the buffer to replace/append
- * @skb: buffer to be created, appended to and returned in case of success
- * @msg: message to be appended
- * @mtu: max allowable size for the bundle buffer, inclusive header
- * @dnode: destination node for message. (Not always present in header)
- * Returns true if success, otherwise false
- */
-bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
- u32 mtu, u32 dnode)
-{
- struct sk_buff *_skb;
- struct tipc_msg *bmsg;
- u32 msz = msg_size(msg);
- u32 max = mtu - INT_H_SIZE;
-
- if (msg_user(msg) == MSG_FRAGMENTER)
- return false;
- if (msg_user(msg) == TUNNEL_PROTOCOL)
- return false;
- if (msg_user(msg) == BCAST_PROTOCOL)
- return false;
- if (msz > (max / 2))
- return false;
-
- _skb = tipc_buf_acquire(max, GFP_ATOMIC);
- if (!_skb)
- return false;
-
- skb_trim(_skb, INT_H_SIZE);
- bmsg = buf_msg(_skb);
- tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0,
- INT_H_SIZE, dnode);
- msg_set_importance(bmsg, msg_importance(msg));
- msg_set_seqno(bmsg, msg_seqno(msg));
- msg_set_ack(bmsg, msg_ack(msg));
- msg_set_bcast_ack(bmsg, msg_bcast_ack(msg));
- tipc_msg_bundle(_skb, msg, mtu);
- *skb = _skb;
- return true;
-}
-
-/**
* tipc_msg_reverse(): swap source and destination addresses and add error code
* @own_node: originating node id for reversed message
* @skb: buffer containing message to be reversed; will be consumed
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 0daa6f04ca81..6d466ebdb64f 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -102,16 +102,42 @@ struct plist;
#define TIPC_MEDIA_INFO_OFFSET 5
struct tipc_skb_cb {
- struct sk_buff *tail;
- unsigned long nxt_retr;
- unsigned long retr_stamp;
- u32 bytes_read;
- u32 orig_member;
- u16 chain_imp;
- u16 ackers;
- u16 retr_cnt;
- bool validated;
-};
+ union {
+ struct {
+ struct sk_buff *tail;
+ unsigned long nxt_retr;
+ unsigned long retr_stamp;
+ u32 bytes_read;
+ u32 orig_member;
+ u16 chain_imp;
+ u16 ackers;
+ u16 retr_cnt;
+ } __packed;
+#ifdef CONFIG_TIPC_CRYPTO
+ struct {
+ struct tipc_crypto *rx;
+ struct tipc_aead *last;
+ u8 recurs;
+ } tx_clone_ctx __packed;
+#endif
+ } __packed;
+ union {
+ struct {
+ u8 validated:1;
+#ifdef CONFIG_TIPC_CRYPTO
+ u8 encrypted:1;
+ u8 decrypted:1;
+ u8 probe:1;
+ u8 tx_clone_deferred:1;
+#endif
+ };
+ u8 flags;
+ };
+ u8 reserved;
+#ifdef CONFIG_TIPC_CRYPTO
+ void *crypto_ctx;
+#endif
+} __packed;
#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
@@ -290,6 +316,16 @@ static inline void msg_set_src_droppable(struct tipc_msg *m, u32 d)
msg_set_bits(m, 0, 18, 1, d);
}
+static inline int msg_ack_required(struct tipc_msg *m)
+{
+ return msg_bits(m, 0, 18, 1);
+}
+
+static inline void msg_set_ack_required(struct tipc_msg *m, u32 d)
+{
+ msg_set_bits(m, 0, 18, 1, d);
+}
+
static inline bool msg_is_rcast(struct tipc_msg *m)
{
return msg_bits(m, 0, 18, 0x1);
@@ -1026,6 +1062,20 @@ static inline bool msg_is_reset(struct tipc_msg *hdr)
return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG);
}
+/* Word 13
+ */
+static inline void msg_set_peer_net_hash(struct tipc_msg *m, u32 n)
+{
+ msg_set_word(m, 13, n);
+}
+
+static inline u32 msg_peer_net_hash(struct tipc_msg *m)
+{
+ return msg_word(m, 13);
+}
+
+/* Word 14
+ */
static inline u32 msg_sugg_node_addr(struct tipc_msg *m)
{
return msg_word(m, 14);
@@ -1057,14 +1107,15 @@ struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
uint data_sz, u32 dnode, u32 onode,
u32 dport, u32 oport, int errcode);
int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf);
-bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu);
-bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
- u32 mtu, u32 dnode);
+bool tipc_msg_try_bundle(struct sk_buff *tskb, struct sk_buff **skb, u32 mss,
+ u32 dnode, bool *new_bundle);
bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr,
int pktmax, struct sk_buff_head *frags);
int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
int offset, int dsz, int mtu, struct sk_buff_head *list);
+int tipc_msg_append(struct tipc_msg *hdr, struct msghdr *m, int dlen,
+ int mss, struct sk_buff_head *txq);
bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err);
bool tipc_msg_assemble(struct sk_buff_head *list);
bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq);
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 836e629e8f4a..5feaf3b67380 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -146,7 +146,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
struct publication *publ;
struct sk_buff *skb = NULL;
struct distr_item *item = NULL;
- u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0) - INT_H_SIZE) /
+ u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0, false) - INT_H_SIZE) /
ITEM_SIZE) * ITEM_SIZE;
u32 msg_rem = msg_dsz;
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index d6165ad384c0..e53231bd23b4 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -102,7 +102,11 @@ const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = {
[TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC },
[TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 },
- [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG }
+ [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG },
+ [TIPC_NLA_NODE_ID] = { .type = NLA_BINARY,
+ .len = TIPC_NODEID_LEN},
+ [TIPC_NLA_NODE_KEY] = { .type = NLA_BINARY,
+ .len = TIPC_AEAD_KEY_SIZE_MAX},
};
/* Properties valid for media, bearer and link */
@@ -176,7 +180,8 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
},
{
.cmd = TIPC_NL_PUBL_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
.dumpit = tipc_nl_publ_dump,
},
{
@@ -239,7 +244,8 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
},
{
.cmd = TIPC_NL_MON_PEER_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
.dumpit = tipc_nl_node_dump_monitor_peer,
},
{
@@ -250,10 +256,23 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
#ifdef CONFIG_TIPC_MEDIA_UDP
{
.cmd = TIPC_NL_UDP_GET_REMOTEIP,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT |
+ GENL_DONT_VALIDATE_DUMP_STRICT,
.dumpit = tipc_udp_nl_dump_remoteip,
},
#endif
+#ifdef CONFIG_TIPC_CRYPTO
+ {
+ .cmd = TIPC_NL_KEY_SET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_node_set_key,
+ },
+ {
+ .cmd = TIPC_NL_KEY_FLUSH,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_node_flush_key,
+ },
+#endif
};
struct genl_family tipc_genl_family __ro_after_init = {
@@ -268,18 +287,6 @@ struct genl_family tipc_genl_family __ro_after_init = {
.n_ops = ARRAY_SIZE(tipc_genl_v2_ops),
};
-int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr)
-{
- u32 maxattr = tipc_genl_family.maxattr;
-
- *attr = genl_family_attrbuf(&tipc_genl_family);
- if (!*attr)
- return -EOPNOTSUPP;
-
- return nlmsg_parse_deprecated(nlh, GENL_HDRLEN, *attr, maxattr,
- tipc_nl_policy, NULL);
-}
-
int __init tipc_netlink_start(void)
{
int res;
diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h
index 4ba0ad422110..7cf777723e3e 100644
--- a/net/tipc/netlink.h
+++ b/net/tipc/netlink.h
@@ -38,7 +38,6 @@
#include <net/netlink.h>
extern struct genl_family tipc_genl_family;
-int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***buf);
struct tipc_nl_msg {
struct sk_buff *skb;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index e135d4e11231..17a529739f8d 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -181,15 +181,18 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
struct tipc_nl_compat_msg *msg,
struct sk_buff *arg)
{
+ struct genl_dumpit_info info;
int len = 0;
int err;
struct sk_buff *buf;
struct nlmsghdr *nlmsg;
struct netlink_callback cb;
+ struct nlattr **attrbuf;
memset(&cb, 0, sizeof(cb));
cb.nlh = (struct nlmsghdr *)arg->data;
cb.skb = arg;
+ cb.data = &info;
buf = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (!buf)
@@ -201,19 +204,35 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
return -ENOMEM;
}
+ attrbuf = kmalloc_array(tipc_genl_family.maxattr + 1,
+ sizeof(struct nlattr *), GFP_KERNEL);
+ if (!attrbuf) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ info.attrs = attrbuf;
+ err = nlmsg_parse_deprecated(cb.nlh, GENL_HDRLEN, attrbuf,
+ tipc_genl_family.maxattr,
+ tipc_genl_family.policy, NULL);
+ if (err)
+ goto err_out;
+
do {
int rem;
len = (*cmd->dumpit)(buf, &cb);
nlmsg_for_each_msg(nlmsg, nlmsg_hdr(buf), len, rem) {
- struct nlattr **attrs;
-
- err = tipc_nlmsg_parse(nlmsg, &attrs);
+ err = nlmsg_parse_deprecated(nlmsg, GENL_HDRLEN,
+ attrbuf,
+ tipc_genl_family.maxattr,
+ tipc_genl_family.policy,
+ NULL);
if (err)
goto err_out;
- err = (*cmd->format)(msg, attrs);
+ err = (*cmd->format)(msg, attrbuf);
if (err)
goto err_out;
@@ -231,6 +250,7 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
err = 0;
err_out:
+ kfree(attrbuf);
tipc_dump_done(&cb);
kfree_skb(buf);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index c8f6177dd5a2..aaf595613e6e 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -44,6 +44,7 @@
#include "discover.h"
#include "netlink.h"
#include "trace.h"
+#include "crypto.h"
#define INVALID_NODE_SIG 0x10000
#define NODE_CLEANUP_AFTER 300000
@@ -89,6 +90,7 @@ struct tipc_bclink_entry {
* @links: array containing references to all links to node
* @action_flags: bit mask of different types of node actions
* @state: connectivity state vs peer node
+ * @preliminary: a preliminary node or not
* @sync_point: sequence number where synch/failover is finished
* @list: links to adjacent nodes in sorted list of cluster's nodes
* @working_links: number of working links to node (both active and standby)
@@ -99,6 +101,7 @@ struct tipc_bclink_entry {
* @publ_list: list of publications
* @rcu: rcu struct for tipc_node
* @delete_at: indicates the time for deleting a down node
+ * @crypto_rx: RX crypto handler
*/
struct tipc_node {
u32 addr;
@@ -112,6 +115,7 @@ struct tipc_node {
int action_flags;
struct list_head list;
int state;
+ bool preliminary;
bool failover_sent;
u16 sync_point;
int link_cnt;
@@ -120,12 +124,18 @@ struct tipc_node {
u32 signature;
u32 link_id;
u8 peer_id[16];
+ char peer_id_string[NODE_ID_STR_LEN];
struct list_head publ_list;
struct list_head conn_sks;
unsigned long keepalive_intv;
struct timer_list timer;
struct rcu_head rcu;
unsigned long delete_at;
+ struct net *peer_net;
+ u32 peer_hash_mix;
+#ifdef CONFIG_TIPC_CRYPTO
+ struct tipc_crypto *crypto_rx;
+#endif
};
/* Node FSM states and events:
@@ -163,7 +173,6 @@ static void tipc_node_timeout(struct timer_list *t);
static void tipc_node_fsm_evt(struct tipc_node *n, int evt);
static struct tipc_node *tipc_node_find(struct net *net, u32 addr);
static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id);
-static void tipc_node_put(struct tipc_node *node);
static bool node_is_up(struct tipc_node *n);
static void tipc_node_delete_from_list(struct tipc_node *node);
@@ -184,7 +193,7 @@ static struct tipc_link *node_active_link(struct tipc_node *n, int sel)
return n->links[bearer_id].link;
}
-int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel)
+int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected)
{
struct tipc_node *n;
int bearer_id;
@@ -194,6 +203,14 @@ int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel)
if (unlikely(!n))
return mtu;
+ /* Allow MAX_MSG_SIZE when building connection oriented message
+ * if they are in the same core network
+ */
+ if (n->peer_net && connected) {
+ tipc_node_put(n);
+ return mtu;
+ }
+
bearer_id = n->active_links[sel & 1];
if (likely(bearer_id != INVALID_BEARER_ID))
mtu = n->links[bearer_id].mtu;
@@ -235,15 +252,51 @@ u16 tipc_node_get_capabilities(struct net *net, u32 addr)
return caps;
}
+u32 tipc_node_get_addr(struct tipc_node *node)
+{
+ return (node) ? node->addr : 0;
+}
+
+char *tipc_node_get_id_str(struct tipc_node *node)
+{
+ return node->peer_id_string;
+}
+
+#ifdef CONFIG_TIPC_CRYPTO
+/**
+ * tipc_node_crypto_rx - Retrieve crypto RX handle from node
+ * Note: node ref counter must be held first!
+ */
+struct tipc_crypto *tipc_node_crypto_rx(struct tipc_node *__n)
+{
+ return (__n) ? __n->crypto_rx : NULL;
+}
+
+struct tipc_crypto *tipc_node_crypto_rx_by_list(struct list_head *pos)
+{
+ return container_of(pos, struct tipc_node, list)->crypto_rx;
+}
+#endif
+
+void tipc_node_free(struct rcu_head *rp)
+{
+ struct tipc_node *n = container_of(rp, struct tipc_node, rcu);
+
+#ifdef CONFIG_TIPC_CRYPTO
+ tipc_crypto_stop(&n->crypto_rx);
+#endif
+ kfree(n);
+}
+
static void tipc_node_kref_release(struct kref *kref)
{
struct tipc_node *n = container_of(kref, struct tipc_node, kref);
kfree(n->bc_entry.link);
- kfree_rcu(n, rcu);
+ call_rcu(&n->rcu, tipc_node_free);
}
-static void tipc_node_put(struct tipc_node *node)
+void tipc_node_put(struct tipc_node *node)
{
kref_put(&node->kref, tipc_node_kref_release);
}
@@ -264,7 +317,7 @@ static struct tipc_node *tipc_node_find(struct net *net, u32 addr)
rcu_read_lock();
hlist_for_each_entry_rcu(node, &tn->node_htable[thash], hash) {
- if (node->addr != addr)
+ if (node->addr != addr || node->preliminary)
continue;
if (!kref_get_unless_zero(&node->kref))
node = NULL;
@@ -360,18 +413,71 @@ static void tipc_node_write_unlock(struct tipc_node *n)
}
}
-static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
- u8 *peer_id, u16 capabilities)
+static void tipc_node_assign_peer_net(struct tipc_node *n, u32 hash_mixes)
+{
+ int net_id = tipc_netid(n->net);
+ struct tipc_net *tn_peer;
+ struct net *tmp;
+ u32 hash_chk;
+
+ if (n->peer_net)
+ return;
+
+ for_each_net_rcu(tmp) {
+ tn_peer = tipc_net(tmp);
+ if (!tn_peer)
+ continue;
+ /* Integrity checking whether node exists in namespace or not */
+ if (tn_peer->net_id != net_id)
+ continue;
+ if (memcmp(n->peer_id, tn_peer->node_id, NODE_ID_LEN))
+ continue;
+ hash_chk = tipc_net_hash_mixes(tmp, tn_peer->random);
+ if (hash_mixes ^ hash_chk)
+ continue;
+ n->peer_net = tmp;
+ n->peer_hash_mix = hash_mixes;
+ break;
+ }
+}
+
+struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id,
+ u16 capabilities, u32 hash_mixes,
+ bool preliminary)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_node *n, *temp_node;
struct tipc_link *l;
+ unsigned long intv;
int bearer_id;
int i;
spin_lock_bh(&tn->node_list_lock);
- n = tipc_node_find(net, addr);
+ n = tipc_node_find(net, addr) ?:
+ tipc_node_find_by_id(net, peer_id);
if (n) {
+ if (!n->preliminary)
+ goto update;
+ if (preliminary)
+ goto exit;
+ /* A preliminary node becomes "real" now, refresh its data */
+ tipc_node_write_lock(n);
+ n->preliminary = false;
+ n->addr = addr;
+ hlist_del_rcu(&n->hash);
+ hlist_add_head_rcu(&n->hash,
+ &tn->node_htable[tipc_hashfn(addr)]);
+ list_del_rcu(&n->list);
+ list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+ if (n->addr < temp_node->addr)
+ break;
+ }
+ list_add_tail_rcu(&n->list, &temp_node->list);
+ tipc_node_write_unlock_fast(n);
+
+update:
+ if (n->peer_hash_mix ^ hash_mixes)
+ tipc_node_assign_peer_net(n, hash_mixes);
if (n->capabilities == capabilities)
goto exit;
/* Same node may come back with new capabilities */
@@ -389,6 +495,7 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
tn->capabilities &= temp_node->capabilities;
}
+
goto exit;
}
n = kzalloc(sizeof(*n), GFP_ATOMIC);
@@ -396,9 +503,23 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
pr_warn("Node creation failed, no memory\n");
goto exit;
}
+ tipc_nodeid2string(n->peer_id_string, peer_id);
+#ifdef CONFIG_TIPC_CRYPTO
+ if (unlikely(tipc_crypto_start(&n->crypto_rx, net, n))) {
+ pr_warn("Failed to start crypto RX(%s)!\n", n->peer_id_string);
+ kfree(n);
+ n = NULL;
+ goto exit;
+ }
+#endif
n->addr = addr;
+ n->preliminary = preliminary;
memcpy(&n->peer_id, peer_id, 16);
n->net = net;
+ n->peer_net = NULL;
+ n->peer_hash_mix = 0;
+ /* Assign kernel local namespace if exists */
+ tipc_node_assign_peer_net(n, hash_mixes);
n->capabilities = capabilities;
kref_init(&n->kref);
rwlock_init(&n->lock);
@@ -417,22 +538,14 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
n->signature = INVALID_NODE_SIG;
n->active_links[0] = INVALID_BEARER_ID;
n->active_links[1] = INVALID_BEARER_ID;
- if (!tipc_link_bc_create(net, tipc_own_addr(net),
- addr, U16_MAX,
- tipc_link_window(tipc_bc_sndlink(net)),
- n->capabilities,
- &n->bc_entry.inputq1,
- &n->bc_entry.namedq,
- tipc_bc_sndlink(net),
- &n->bc_entry.link)) {
- pr_warn("Broadcast rcv link creation failed, no memory\n");
- kfree(n);
- n = NULL;
- goto exit;
- }
+ n->bc_entry.link = NULL;
tipc_node_get(n);
timer_setup(&n->timer, tipc_node_timeout, 0);
- n->keepalive_intv = U32_MAX;
+ /* Start a slow timer anyway, crypto needs it */
+ n->keepalive_intv = 10000;
+ intv = jiffies + msecs_to_jiffies(n->keepalive_intv);
+ if (!mod_timer(&n->timer, intv))
+ tipc_node_get(n);
hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
if (n->addr < temp_node->addr)
@@ -617,6 +730,11 @@ static bool tipc_node_cleanup(struct tipc_node *peer)
}
tipc_node_write_unlock(peer);
+ if (!deleted) {
+ spin_unlock_bh(&tn->node_list_lock);
+ return deleted;
+ }
+
/* Calculate cluster capabilities */
tn->capabilities = TIPC_NODE_CAPABILITIES;
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
@@ -645,6 +763,10 @@ static void tipc_node_timeout(struct timer_list *t)
return;
}
+#ifdef CONFIG_TIPC_CRYPTO
+ /* Take any crypto key related actions first */
+ tipc_crypto_timeout(n->crypto_rx);
+#endif
__skb_queue_head_init(&xmitq);
/* Initial node interval to value larger (10 seconds), then it will be
@@ -665,7 +787,7 @@ static void tipc_node_timeout(struct timer_list *t)
remains--;
}
tipc_node_read_unlock(n);
- tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr);
+ tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr, n);
if (rc & TIPC_LINK_DOWN_EVT)
tipc_node_link_down(n, bearer_id, false);
}
@@ -697,7 +819,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
n->link_id = tipc_link_id(nl);
/* Leave room for tunnel header when returning 'mtu' to users: */
- n->links[bearer_id].mtu = tipc_link_mtu(nl) - INT_H_SIZE;
+ n->links[bearer_id].mtu = tipc_link_mss(nl);
tipc_bearer_add_dest(n->net, bearer_id, n->addr);
tipc_bcast_inc_bearer_dst_cnt(n->net, bearer_id);
@@ -751,7 +873,7 @@ static void tipc_node_link_up(struct tipc_node *n, int bearer_id,
tipc_node_write_lock(n);
__tipc_node_link_up(n, bearer_id, xmitq);
maddr = &n->links[bearer_id].maddr;
- tipc_bearer_xmit(n->net, bearer_id, xmitq, maddr);
+ tipc_bearer_xmit(n->net, bearer_id, xmitq, maddr, n);
tipc_node_write_unlock(n);
}
@@ -906,7 +1028,7 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
if (delete)
tipc_mon_remove_peer(n->net, n->addr, old_bearer_id);
if (!skb_queue_empty(&xmitq))
- tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr);
+ tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr, n);
tipc_sk_rcv(n->net, &le->inputq);
}
@@ -950,6 +1072,8 @@ u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr)
{
struct tipc_net *tn = tipc_net(net);
struct tipc_node *n;
+ bool preliminary;
+ u32 sugg_addr;
/* Suggest new address if some other peer is using this one */
n = tipc_node_find(net, addr);
@@ -965,9 +1089,11 @@ u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr)
/* Suggest previously used address if peer is known */
n = tipc_node_find_by_id(net, id);
if (n) {
- addr = n->addr;
+ sugg_addr = n->addr;
+ preliminary = n->preliminary;
tipc_node_put(n);
- return addr;
+ if (!preliminary)
+ return sugg_addr;
}
/* Even this node may be in conflict */
@@ -979,12 +1105,12 @@ u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr)
void tipc_node_check_dest(struct net *net, u32 addr,
u8 *peer_id, struct tipc_bearer *b,
- u16 capabilities, u32 signature,
+ u16 capabilities, u32 signature, u32 hash_mixes,
struct tipc_media_addr *maddr,
bool *respond, bool *dupl_addr)
{
struct tipc_node *n;
- struct tipc_link *l;
+ struct tipc_link *l, *snd_l;
struct tipc_link_entry *le;
bool addr_match = false;
bool sign_match = false;
@@ -998,11 +1124,27 @@ void tipc_node_check_dest(struct net *net, u32 addr,
*dupl_addr = false;
*respond = false;
- n = tipc_node_create(net, addr, peer_id, capabilities);
+ n = tipc_node_create(net, addr, peer_id, capabilities, hash_mixes,
+ false);
if (!n)
return;
tipc_node_write_lock(n);
+ if (unlikely(!n->bc_entry.link)) {
+ snd_l = tipc_bc_sndlink(net);
+ if (!tipc_link_bc_create(net, tipc_own_addr(net),
+ addr, U16_MAX,
+ tipc_link_window(snd_l),
+ n->capabilities,
+ &n->bc_entry.inputq1,
+ &n->bc_entry.namedq, snd_l,
+ &n->bc_entry.link)) {
+ pr_warn("Broadcast rcv link creation failed, no mem\n");
+ tipc_node_write_unlock_fast(n);
+ tipc_node_put(n);
+ return;
+ }
+ }
le = &n->links[b->identity];
@@ -1017,6 +1159,9 @@ void tipc_node_check_dest(struct net *net, u32 addr,
if (sign_match && addr_match && link_up) {
/* All is fine. Do nothing. */
reset = false;
+ /* Peer node is not a container/local namespace */
+ if (!n->peer_hash_mix)
+ n->peer_hash_mix = hash_mixes;
} else if (sign_match && addr_match && !link_up) {
/* Respond. The link will come up in due time */
*respond = true;
@@ -1342,7 +1487,8 @@ static void node_lost_contact(struct tipc_node *n,
/* Notify publications from this node */
n->action_flags |= TIPC_NOTIFY_NODE_DOWN;
-
+ n->peer_net = NULL;
+ n->peer_hash_mix = 0;
/* Notify sockets connected to node */
list_for_each_entry_safe(conn, safe, conns, list) {
skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
@@ -1424,6 +1570,56 @@ msg_full:
return -EMSGSIZE;
}
+static void tipc_lxc_xmit(struct net *peer_net, struct sk_buff_head *list)
+{
+ struct tipc_msg *hdr = buf_msg(skb_peek(list));
+ struct sk_buff_head inputq;
+
+ switch (msg_user(hdr)) {
+ case TIPC_LOW_IMPORTANCE:
+ case TIPC_MEDIUM_IMPORTANCE:
+ case TIPC_HIGH_IMPORTANCE:
+ case TIPC_CRITICAL_IMPORTANCE:
+ if (msg_connected(hdr) || msg_named(hdr)) {
+ tipc_loopback_trace(peer_net, list);
+ spin_lock_init(&list->lock);
+ tipc_sk_rcv(peer_net, list);
+ return;
+ }
+ if (msg_mcast(hdr)) {
+ tipc_loopback_trace(peer_net, list);
+ skb_queue_head_init(&inputq);
+ tipc_sk_mcast_rcv(peer_net, list, &inputq);
+ __skb_queue_purge(list);
+ skb_queue_purge(&inputq);
+ return;
+ }
+ return;
+ case MSG_FRAGMENTER:
+ if (tipc_msg_assemble(list)) {
+ tipc_loopback_trace(peer_net, list);
+ skb_queue_head_init(&inputq);
+ tipc_sk_mcast_rcv(peer_net, list, &inputq);
+ __skb_queue_purge(list);
+ skb_queue_purge(&inputq);
+ }
+ return;
+ case GROUP_PROTOCOL:
+ case CONN_MANAGER:
+ tipc_loopback_trace(peer_net, list);
+ spin_lock_init(&list->lock);
+ tipc_sk_rcv(peer_net, list);
+ return;
+ case LINK_PROTOCOL:
+ case NAME_DISTRIBUTOR:
+ case TUNNEL_PROTOCOL:
+ case BCAST_PROTOCOL:
+ return;
+ default:
+ return;
+ };
+}
+
/**
* tipc_node_xmit() is the general link level function for message sending
* @net: the applicable net namespace
@@ -1439,6 +1635,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
struct tipc_link_entry *le = NULL;
struct tipc_node *n;
struct sk_buff_head xmitq;
+ bool node_up = false;
int bearer_id;
int rc;
@@ -1456,6 +1653,17 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
}
tipc_node_read_lock(n);
+ node_up = node_is_up(n);
+ if (node_up && n->peer_net && check_net(n->peer_net)) {
+ /* xmit inner linux container */
+ tipc_lxc_xmit(n->peer_net, list);
+ if (likely(skb_queue_empty(list))) {
+ tipc_node_read_unlock(n);
+ tipc_node_put(n);
+ return 0;
+ }
+ }
+
bearer_id = n->active_links[selector & 1];
if (unlikely(bearer_id == INVALID_BEARER_ID)) {
tipc_node_read_unlock(n);
@@ -1474,7 +1682,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
if (unlikely(rc == -ENOBUFS))
tipc_node_link_down(n, bearer_id, false);
else
- tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n);
tipc_node_put(n);
@@ -1622,7 +1830,7 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id
}
if (!skb_queue_empty(&xmitq))
- tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n);
if (!skb_queue_empty(&be->inputq1))
tipc_node_mcast_rcv(n);
@@ -1800,20 +2008,38 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
{
struct sk_buff_head xmitq;
- struct tipc_node *n;
+ struct tipc_link_entry *le;
struct tipc_msg *hdr;
+ struct tipc_node *n;
int bearer_id = b->identity;
- struct tipc_link_entry *le;
u32 self = tipc_own_addr(net);
int usr, rc = 0;
u16 bc_ack;
+#ifdef CONFIG_TIPC_CRYPTO
+ struct tipc_ehdr *ehdr;
- __skb_queue_head_init(&xmitq);
+ /* Check if message must be decrypted first */
+ if (TIPC_SKB_CB(skb)->decrypted || !tipc_ehdr_validate(skb))
+ goto rcv;
+
+ ehdr = (struct tipc_ehdr *)skb->data;
+ if (likely(ehdr->user != LINK_CONFIG)) {
+ n = tipc_node_find(net, ntohl(ehdr->addr));
+ if (unlikely(!n))
+ goto discard;
+ } else {
+ n = tipc_node_find_by_id(net, ehdr->id);
+ }
+ tipc_crypto_rcv(net, (n) ? n->crypto_rx : NULL, &skb, b);
+ if (!skb)
+ return;
+rcv:
+#endif
/* Ensure message is well-formed before touching the header */
- TIPC_SKB_CB(skb)->validated = false;
if (unlikely(!tipc_msg_validate(&skb)))
goto discard;
+ __skb_queue_head_init(&xmitq);
hdr = buf_msg(skb);
usr = msg_user(hdr);
bc_ack = msg_bcast_ack(hdr);
@@ -1884,7 +2110,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
tipc_sk_rcv(net, &le->inputq);
if (!skb_queue_empty(&xmitq))
- tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n);
tipc_node_put(n);
discard:
@@ -1915,7 +2141,7 @@ void tipc_node_apply_property(struct net *net, struct tipc_bearer *b,
tipc_link_set_mtu(e->link, b->mtu);
}
tipc_node_write_unlock(n);
- tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr);
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr, NULL);
}
rcu_read_unlock();
@@ -1926,7 +2152,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
struct net *net = sock_net(skb->sk);
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct nlattr *attrs[TIPC_NLA_NET_MAX + 1];
- struct tipc_node *peer;
+ struct tipc_node *peer, *temp_node;
u32 addr;
int err;
@@ -1967,6 +2193,11 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
tipc_node_write_unlock(peer);
tipc_node_delete(peer);
+ /* Calculate cluster capabilities */
+ tn->capabilities = TIPC_NODE_CAPABILITIES;
+ list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
+ tn->capabilities &= temp_node->capabilities;
+ }
err = 0;
err_out:
tipc_node_put(peer);
@@ -2011,6 +2242,8 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb)
}
list_for_each_entry_rcu(node, &tn->node_list, list) {
+ if (node->preliminary)
+ continue;
if (last_addr) {
if (node->addr == last_addr)
last_addr = 0;
@@ -2150,7 +2383,8 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info)
out:
tipc_node_read_unlock(node);
- tipc_bearer_xmit(net, bearer_id, &xmitq, &node->links[bearer_id].maddr);
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &node->links[bearer_id].maddr,
+ NULL);
return res;
}
@@ -2484,13 +2718,9 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb,
int err;
if (!prev_node) {
- struct nlattr **attrs;
+ struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
struct nlattr *mon[TIPC_NLA_MON_MAX + 1];
- err = tipc_nlmsg_parse(cb->nlh, &attrs);
- if (err)
- return err;
-
if (!attrs[TIPC_NLA_MON])
return -EINVAL;
@@ -2530,11 +2760,141 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb,
return skb->len;
}
-u32 tipc_node_get_addr(struct tipc_node *node)
+#ifdef CONFIG_TIPC_CRYPTO
+static int tipc_nl_retrieve_key(struct nlattr **attrs,
+ struct tipc_aead_key **key)
{
- return (node) ? node->addr : 0;
+ struct nlattr *attr = attrs[TIPC_NLA_NODE_KEY];
+
+ if (!attr)
+ return -ENODATA;
+
+ *key = (struct tipc_aead_key *)nla_data(attr);
+ if (nla_len(attr) < tipc_aead_key_size(*key))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int tipc_nl_retrieve_nodeid(struct nlattr **attrs, u8 **node_id)
+{
+ struct nlattr *attr = attrs[TIPC_NLA_NODE_ID];
+
+ if (!attr)
+ return -ENODATA;
+
+ if (nla_len(attr) < TIPC_NODEID_LEN)
+ return -EINVAL;
+
+ *node_id = (u8 *)nla_data(attr);
+ return 0;
}
+int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *attrs[TIPC_NLA_NODE_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_node *n = NULL;
+ struct tipc_aead_key *ukey;
+ struct tipc_crypto *c;
+ u8 *id, *own_id;
+ int rc = 0;
+
+ if (!info->attrs[TIPC_NLA_NODE])
+ return -EINVAL;
+
+ rc = nla_parse_nested(attrs, TIPC_NLA_NODE_MAX,
+ info->attrs[TIPC_NLA_NODE],
+ tipc_nl_node_policy, info->extack);
+ if (rc)
+ goto exit;
+
+ own_id = tipc_own_id(net);
+ if (!own_id) {
+ rc = -EPERM;
+ goto exit;
+ }
+
+ rc = tipc_nl_retrieve_key(attrs, &ukey);
+ if (rc)
+ goto exit;
+
+ rc = tipc_aead_key_validate(ukey);
+ if (rc)
+ goto exit;
+
+ rc = tipc_nl_retrieve_nodeid(attrs, &id);
+ switch (rc) {
+ case -ENODATA:
+ /* Cluster key mode */
+ rc = tipc_crypto_key_init(tn->crypto_tx, ukey, CLUSTER_KEY);
+ break;
+ case 0:
+ /* Per-node key mode */
+ if (!memcmp(id, own_id, NODE_ID_LEN)) {
+ c = tn->crypto_tx;
+ } else {
+ n = tipc_node_find_by_id(net, id) ?:
+ tipc_node_create(net, 0, id, 0xffffu, 0, true);
+ if (unlikely(!n)) {
+ rc = -ENOMEM;
+ break;
+ }
+ c = n->crypto_rx;
+ }
+
+ rc = tipc_crypto_key_init(c, ukey, PER_NODE_KEY);
+ if (n)
+ tipc_node_put(n);
+ break;
+ default:
+ break;
+ }
+
+exit:
+ return (rc < 0) ? rc : 0;
+}
+
+int tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+
+ rtnl_lock();
+ err = __tipc_nl_node_set_key(skb, info);
+ rtnl_unlock();
+
+ return err;
+}
+
+int __tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = sock_net(skb->sk);
+ struct tipc_net *tn = tipc_net(net);
+ struct tipc_node *n;
+
+ tipc_crypto_key_flush(tn->crypto_tx);
+ rcu_read_lock();
+ list_for_each_entry_rcu(n, &tn->node_list, list)
+ tipc_crypto_key_flush(n->crypto_rx);
+ rcu_read_unlock();
+
+ pr_info("All keys are flushed!\n");
+ return 0;
+}
+
+int tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info)
+{
+ int err;
+
+ rtnl_lock();
+ err = __tipc_nl_node_flush_key(skb, info);
+ rtnl_unlock();
+
+ return err;
+}
+#endif
+
/**
* tipc_node_dump - dump TIPC node data
* @n: tipc node to be dumped
@@ -2591,3 +2951,33 @@ int tipc_node_dump(struct tipc_node *n, bool more, char *buf)
return i;
}
+
+void tipc_node_pre_cleanup_net(struct net *exit_net)
+{
+ struct tipc_node *n;
+ struct tipc_net *tn;
+ struct net *tmp;
+
+ rcu_read_lock();
+ for_each_net_rcu(tmp) {
+ if (tmp == exit_net)
+ continue;
+ tn = tipc_net(tmp);
+ if (!tn)
+ continue;
+ spin_lock_bh(&tn->node_list_lock);
+ list_for_each_entry_rcu(n, &tn->node_list, list) {
+ if (!n->peer_net)
+ continue;
+ if (n->peer_net != exit_net)
+ continue;
+ tipc_node_write_lock(n);
+ n->peer_net = NULL;
+ n->peer_hash_mix = 0;
+ tipc_node_write_unlock_fast(n);
+ break;
+ }
+ spin_unlock_bh(&tn->node_list_lock);
+ }
+ rcu_read_unlock();
+}
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 291d0ecd4101..a6803b449a2c 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -54,7 +54,8 @@ enum {
TIPC_LINK_PROTO_SEQNO = (1 << 6),
TIPC_MCAST_RBCTL = (1 << 7),
TIPC_GAP_ACK_BLOCK = (1 << 8),
- TIPC_TUNNEL_ENHANCED = (1 << 9)
+ TIPC_TUNNEL_ENHANCED = (1 << 9),
+ TIPC_NAGLE = (1 << 10)
};
#define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT | \
@@ -66,16 +67,27 @@ enum {
TIPC_LINK_PROTO_SEQNO | \
TIPC_MCAST_RBCTL | \
TIPC_GAP_ACK_BLOCK | \
- TIPC_TUNNEL_ENHANCED)
+ TIPC_TUNNEL_ENHANCED | \
+ TIPC_NAGLE)
+
#define INVALID_BEARER_ID -1
void tipc_node_stop(struct net *net);
bool tipc_node_get_id(struct net *net, u32 addr, u8 *id);
u32 tipc_node_get_addr(struct tipc_node *node);
+char *tipc_node_get_id_str(struct tipc_node *node);
+void tipc_node_put(struct tipc_node *node);
+struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id,
+ u16 capabilities, u32 hash_mixes,
+ bool preliminary);
+#ifdef CONFIG_TIPC_CRYPTO
+struct tipc_crypto *tipc_node_crypto_rx(struct tipc_node *__n);
+struct tipc_crypto *tipc_node_crypto_rx_by_list(struct list_head *pos);
+#endif
u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr);
void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128,
struct tipc_bearer *bearer,
- u16 capabilities, u32 signature,
+ u16 capabilities, u32 signature, u32 hash_mixes,
struct tipc_media_addr *maddr,
bool *respond, bool *dupl_addr);
void tipc_node_delete_links(struct net *net, int bearer_id);
@@ -92,7 +104,7 @@ void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr);
void tipc_node_broadcast(struct net *net, struct sk_buff *skb);
int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
-int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel);
+int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected);
bool tipc_node_is_up(struct net *net, u32 addr);
u16 tipc_node_get_capabilities(struct net *net, u32 addr);
int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb);
@@ -107,4 +119,9 @@ int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb,
struct netlink_callback *cb);
+#ifdef CONFIG_TIPC_CRYPTO
+int tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info);
+#endif
+void tipc_node_pre_cleanup_net(struct net *exit_net);
#endif
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 4b92b196cfa6..5d7859aac78e 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -75,6 +75,7 @@ struct sockaddr_pair {
* @conn_instance: TIPC instance used when connection was established
* @published: non-zero if port has one or more associated names
* @max_pkt: maximum packet size "hint" used when building messages sent by port
+ * @maxnagle: maximum size of msg which can be subject to nagle
* @portid: unique port identity in TIPC socket hash table
* @phdr: preformatted message header used when sending messages
* #cong_links: list of congested links
@@ -97,6 +98,7 @@ struct tipc_sock {
u32 conn_instance;
int published;
u32 max_pkt;
+ u32 maxnagle;
u32 portid;
struct tipc_msg phdr;
struct list_head cong_links;
@@ -116,6 +118,10 @@ struct tipc_sock {
struct tipc_mc_method mc_method;
struct rcu_head rcu;
struct tipc_group *group;
+ u32 oneway;
+ u16 snd_backlog;
+ bool expect_ack;
+ bool nodelay;
bool group_is_open;
};
@@ -137,6 +143,7 @@ static int tipc_sk_insert(struct tipc_sock *tsk);
static void tipc_sk_remove(struct tipc_sock *tsk);
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
+static void tipc_sk_push_backlog(struct tipc_sock *tsk);
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
@@ -227,6 +234,26 @@ static u16 tsk_inc(struct tipc_sock *tsk, int msglen)
return 1;
}
+/* tsk_set_nagle - enable/disable nagle property by manipulating maxnagle
+ */
+static void tsk_set_nagle(struct tipc_sock *tsk)
+{
+ struct sock *sk = &tsk->sk;
+
+ tsk->maxnagle = 0;
+ if (sk->sk_type != SOCK_STREAM)
+ return;
+ if (tsk->nodelay)
+ return;
+ if (!(tsk->peer_caps & TIPC_NAGLE))
+ return;
+ /* Limit node local buffer size to avoid receive queue overflow */
+ if (tsk->max_pkt == MAX_MSG_SIZE)
+ tsk->maxnagle = 1500;
+ else
+ tsk->maxnagle = tsk->max_pkt;
+}
+
/**
* tsk_advance_rx_queue - discard first buffer in socket receive queue
*
@@ -446,6 +473,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
tsk = tipc_sk(sk);
tsk->max_pkt = MAX_PKT_DEFAULT;
+ tsk->maxnagle = 0;
INIT_LIST_HEAD(&tsk->publications);
INIT_LIST_HEAD(&tsk->cong_links);
msg = &tsk->phdr;
@@ -512,8 +540,12 @@ static void __tipc_shutdown(struct socket *sock, int error)
tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
!tsk_conn_cong(tsk)));
- /* Remove any pending SYN message */
- __skb_queue_purge(&sk->sk_write_queue);
+ /* Push out unsent messages or remove if pending SYN */
+ skb = skb_peek(&sk->sk_write_queue);
+ if (skb && !msg_is_syn(buf_msg(skb)))
+ tipc_sk_push_backlog(tsk);
+ else
+ __skb_queue_purge(&sk->sk_write_queue);
/* Reject all unreceived messages, except on an active connection
* (which disconnects locally & sends a 'FIN+' to peer).
@@ -854,7 +886,7 @@ static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk,
/* Build message as chain of buffers */
__skb_queue_head_init(&pkts);
- mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
+ mtu = tipc_node_get_mtu(net, dnode, tsk->portid, false);
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
if (unlikely(rc != dlen))
return rc;
@@ -1208,6 +1240,27 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
tipc_sk_rcv(net, inputq);
}
+/* tipc_sk_push_backlog(): send accumulated buffers in socket write queue
+ * when socket is in Nagle mode
+ */
+static void tipc_sk_push_backlog(struct tipc_sock *tsk)
+{
+ struct sk_buff_head *txq = &tsk->sk.sk_write_queue;
+ struct net *net = sock_net(&tsk->sk);
+ u32 dnode = tsk_peer_node(tsk);
+ int rc;
+
+ if (skb_queue_empty(txq) || tsk->cong_link_cnt)
+ return;
+
+ tsk->snt_unacked += tsk->snd_backlog;
+ tsk->snd_backlog = 0;
+ tsk->expect_ack = true;
+ rc = tipc_node_xmit(net, txq, dnode, tsk->portid);
+ if (rc == -ELINKCONG)
+ tsk->cong_link_cnt = 1;
+}
+
/**
* tipc_sk_conn_proto_rcv - receive a connection mng protocol message
* @tsk: receiving socket
@@ -1221,7 +1274,7 @@ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
u32 onode = tsk_own_node(tsk);
struct sock *sk = &tsk->sk;
int mtyp = msg_type(hdr);
- bool conn_cong;
+ bool was_cong;
/* Ignore if connection cannot be validated: */
if (!tsk_peer_msg(tsk, hdr)) {
@@ -1254,11 +1307,13 @@ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
__skb_queue_tail(xmitq, skb);
return;
} else if (mtyp == CONN_ACK) {
- conn_cong = tsk_conn_cong(tsk);
+ was_cong = tsk_conn_cong(tsk);
+ tsk->expect_ack = false;
+ tipc_sk_push_backlog(tsk);
tsk->snt_unacked -= msg_conn_ack(hdr);
if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
tsk->snd_win = msg_adv_win(hdr);
- if (conn_cong)
+ if (was_cong && !tsk_conn_cong(tsk))
sk->sk_write_space(sk);
} else if (mtyp != CONN_PROBE_REPLY) {
pr_warn("Received unknown CONN_PROTO msg\n");
@@ -1388,7 +1443,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
return rc;
__skb_queue_head_init(&pkts);
- mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
+ mtu = tipc_node_get_mtu(net, dnode, tsk->portid, false);
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
if (unlikely(rc != dlen))
return rc;
@@ -1437,15 +1492,15 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
struct sock *sk = sock->sk;
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
+ struct sk_buff_head *txq = &sk->sk_write_queue;
struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_msg *hdr = &tsk->phdr;
struct net *net = sock_net(sk);
- struct sk_buff_head pkts;
u32 dnode = tsk_peer_node(tsk);
+ int maxnagle = tsk->maxnagle;
+ int maxpkt = tsk->max_pkt;
int send, sent = 0;
- int rc = 0;
-
- __skb_queue_head_init(&pkts);
+ int blocks, rc = 0;
if (unlikely(dlen > INT_MAX))
return -EMSGSIZE;
@@ -1467,21 +1522,35 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
tipc_sk_connected(sk)));
if (unlikely(rc))
break;
-
send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
- rc = tipc_msg_build(hdr, m, sent, send, tsk->max_pkt, &pkts);
- if (unlikely(rc != send))
- break;
-
- trace_tipc_sk_sendstream(sk, skb_peek(&pkts),
+ blocks = tsk->snd_backlog;
+ if (tsk->oneway++ >= 4 && send <= maxnagle) {
+ rc = tipc_msg_append(hdr, m, send, maxnagle, txq);
+ if (unlikely(rc < 0))
+ break;
+ blocks += rc;
+ if (blocks <= 64 && tsk->expect_ack) {
+ tsk->snd_backlog = blocks;
+ sent += send;
+ break;
+ }
+ tsk->expect_ack = true;
+ } else {
+ rc = tipc_msg_build(hdr, m, sent, send, maxpkt, txq);
+ if (unlikely(rc != send))
+ break;
+ blocks += tsk_inc(tsk, send + MIN_H_SIZE);
+ }
+ trace_tipc_sk_sendstream(sk, skb_peek(txq),
TIPC_DUMP_SK_SNDQ, " ");
- rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
+ rc = tipc_node_xmit(net, txq, dnode, tsk->portid);
if (unlikely(rc == -ELINKCONG)) {
tsk->cong_link_cnt = 1;
rc = 0;
}
if (likely(!rc)) {
- tsk->snt_unacked += tsk_inc(tsk, send + MIN_H_SIZE);
+ tsk->snt_unacked += blocks;
+ tsk->snd_backlog = 0;
sent += send;
}
} while (sent < dlen && !rc);
@@ -1526,8 +1595,9 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
tipc_set_sk_state(sk, TIPC_ESTABLISHED);
tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
- tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
+ tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid, true);
tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
+ tsk_set_nagle(tsk);
__skb_queue_purge(&sk->sk_write_queue);
if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
return;
@@ -1848,6 +1918,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
bool peek = flags & MSG_PEEK;
int offset, required, copy, copied = 0;
int hlen, dlen, err, rc;
+ bool ack = false;
long timeout;
/* Catch invalid receive attempts */
@@ -1892,6 +1963,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
/* Copy data if msg ok, otherwise return error/partial data */
if (likely(!err)) {
+ ack = msg_ack_required(hdr);
offset = skb_cb->bytes_read;
copy = min_t(int, dlen - offset, buflen - copied);
rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
@@ -1919,7 +1991,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
/* Send connection flow control advertisement when applicable */
tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
- if (unlikely(tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE))
+ if (ack || tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
tipc_sk_send_ack(tsk);
/* Exit if all requested data or FIN/error received */
@@ -1990,6 +2062,7 @@ static void tipc_sk_proto_rcv(struct sock *sk,
smp_wmb();
tsk->cong_link_cnt--;
wakeup = true;
+ tipc_sk_push_backlog(tsk);
break;
case GROUP_PROTOCOL:
tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq);
@@ -2029,6 +2102,7 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
if (unlikely(msg_mcast(hdr)))
return false;
+ tsk->oneway = 0;
switch (sk->sk_state) {
case TIPC_CONNECTING:
@@ -2074,6 +2148,8 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
return true;
return false;
case TIPC_ESTABLISHED:
+ if (!skb_queue_empty(&sk->sk_write_queue))
+ tipc_sk_push_backlog(tsk);
/* Accept only connection-based messages sent by peer */
if (likely(con_msg && !err && pport == oport && pnode == onode))
return true;
@@ -2959,6 +3035,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
case TIPC_SRC_DROPPABLE:
case TIPC_DEST_DROPPABLE:
case TIPC_CONN_TIMEOUT:
+ case TIPC_NODELAY:
if (ol < sizeof(value))
return -EINVAL;
if (get_user(value, (u32 __user *)ov))
@@ -3007,6 +3084,10 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
case TIPC_GROUP_LEAVE:
res = tipc_sk_leave(tsk);
break;
+ case TIPC_NODELAY:
+ tsk->nodelay = !!value;
+ tsk_set_nagle(tsk);
+ break;
default:
res = -EINVAL;
}
@@ -3588,13 +3669,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct tipc_sock *tsk;
if (!tsk_portid) {
- struct nlattr **attrs;
+ struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1];
- err = tipc_nlmsg_parse(cb->nlh, &attrs);
- if (err)
- return err;
-
if (!attrs[TIPC_NLA_SOCK])
return -EINVAL;
diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c
index 6159d327db76..58ab3d6dcdce 100644
--- a/net/tipc/sysctl.c
+++ b/net/tipc/sysctl.c
@@ -35,6 +35,7 @@
#include "core.h"
#include "trace.h"
+#include "crypto.h"
#include <linux/sysctl.h>
@@ -64,6 +65,16 @@ static struct ctl_table tipc_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
+#ifdef CONFIG_TIPC_CRYPTO
+ {
+ .procname = "max_tfms",
+ .data = &sysctl_tipc_max_tfms,
+ .maxlen = sizeof(sysctl_tipc_max_tfms),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ },
+#endif
{}
};
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 287df68721df..86aaa4d3e781 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -372,6 +372,7 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
goto out;
if (b && test_bit(0, &b->up)) {
+ TIPC_SKB_CB(skb)->flags = 0;
tipc_rcv(sock_net(sk), skb, b);
return 0;
}
@@ -448,15 +449,11 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb)
int i;
if (!bid && !skip_cnt) {
+ struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
struct net *net = sock_net(skb->sk);
struct nlattr *battrs[TIPC_NLA_BEARER_MAX + 1];
- struct nlattr **attrs;
char *bname;
- err = tipc_nlmsg_parse(cb->nlh, &attrs);
- if (err)
- return err;
-
if (!attrs[TIPC_NLA_BEARER])
return -EINVAL;
diff --git a/net/tls/Kconfig b/net/tls/Kconfig
index e4328b3b72eb..61ec78521a60 100644
--- a/net/tls/Kconfig
+++ b/net/tls/Kconfig
@@ -26,3 +26,13 @@ config TLS_DEVICE
Enable kernel support for HW offload of the TLS protocol.
If unsure, say N.
+
+config TLS_TOE
+ bool "Transport Layer Security TCP stack bypass"
+ depends on TLS
+ default n
+ help
+ Enable kernel support for legacy HW offload of the TLS protocol,
+ which is incompatible with the Linux networking stack semantics.
+
+ If unsure, say N.
diff --git a/net/tls/Makefile b/net/tls/Makefile
index ef0dc74ce8f9..f1ffbfe8968d 100644
--- a/net/tls/Makefile
+++ b/net/tls/Makefile
@@ -3,8 +3,11 @@
# Makefile for the TLS subsystem.
#
+CFLAGS_trace.o := -I$(src)
+
obj-$(CONFIG_TLS) += tls.o
-tls-y := tls_main.o tls_sw.o
+tls-y := tls_main.o tls_sw.o tls_proc.o trace.o
+tls-$(CONFIG_TLS_TOE) += tls_toe.o
tls-$(CONFIG_TLS_DEVICE) += tls_device.o tls_device_fallback.o
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 683d00837693..0683788bbef0 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -38,6 +38,8 @@
#include <net/tcp.h>
#include <net/tls.h>
+#include "trace.h"
+
/* device_offload_lock is used to synchronize tls_dev_add
* against NETDEV_DOWN notifications.
*/
@@ -202,6 +204,15 @@ void tls_device_free_resources_tx(struct sock *sk)
tls_free_partial_record(sk, tls_ctx);
}
+void tls_offload_tx_resync_request(struct sock *sk, u32 got_seq, u32 exp_seq)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+
+ trace_tls_device_tx_resync_req(sk, got_seq, exp_seq);
+ WARN_ON(test_and_set_bit(TLS_TX_SYNC_SCHED, &tls_ctx->flags));
+}
+EXPORT_SYMBOL_GPL(tls_offload_tx_resync_request);
+
static void tls_device_resync_tx(struct sock *sk, struct tls_context *tls_ctx,
u32 seq)
{
@@ -216,6 +227,7 @@ static void tls_device_resync_tx(struct sock *sk, struct tls_context *tls_ctx,
rcd_sn = tls_ctx->tx.rec_seq;
+ trace_tls_device_tx_resync_send(sk, seq, rcd_sn);
down_read(&device_offload_lock);
netdev = tls_ctx->netdev;
if (netdev)
@@ -419,7 +431,7 @@ static int tls_push_data(struct sock *sk,
~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST))
return -ENOTSUPP;
- if (sk->sk_err)
+ if (unlikely(sk->sk_err))
return -sk->sk_err;
flags |= MSG_SENDPAGE_DECRYPTED;
@@ -440,9 +452,8 @@ static int tls_push_data(struct sock *sk,
max_open_record_len = TLS_MAX_PAYLOAD_SIZE +
prot->prepend_size;
do {
- rc = tls_do_allocation(sk, ctx, pfrag,
- prot->prepend_size);
- if (rc) {
+ rc = tls_do_allocation(sk, ctx, pfrag, prot->prepend_size);
+ if (unlikely(rc)) {
rc = sk_stream_wait_memory(sk, &timeo);
if (!rc)
continue;
@@ -645,15 +656,19 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
static void tls_device_resync_rx(struct tls_context *tls_ctx,
struct sock *sk, u32 seq, u8 *rcd_sn)
{
+ struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
struct net_device *netdev;
if (WARN_ON(test_and_set_bit(TLS_RX_SYNC_RUNNING, &tls_ctx->flags)))
return;
+
+ trace_tls_device_rx_resync_send(sk, seq, rcd_sn, rx_ctx->resync_type);
netdev = READ_ONCE(tls_ctx->netdev);
if (netdev)
netdev->tlsdev_ops->tls_dev_resync(netdev, sk, seq, rcd_sn,
TLS_OFFLOAD_CTX_DIR_RX);
clear_bit_unlock(TLS_RX_SYNC_RUNNING, &tls_ctx->flags);
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICERESYNC);
}
void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
@@ -661,8 +676,8 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_offload_context_rx *rx_ctx;
u8 rcd_sn[TLS_MAX_REC_SEQ_SIZE];
+ u32 sock_data, is_req_pending;
struct tls_prot_info *prot;
- u32 is_req_pending;
s64 resync_req;
u32 req_seq;
@@ -691,8 +706,12 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
/* head of next rec is already in, note that the sock_inq will
* include the currently parsed message when called from parser
*/
- if (tcp_inq(sk) > rcd_len)
+ sock_data = tcp_inq(sk);
+ if (sock_data > rcd_len) {
+ trace_tls_device_rx_resync_nh_delay(sk, sock_data,
+ rcd_len);
return;
+ }
rx_ctx->resync_nh_do_now = 0;
seq += rcd_len;
@@ -736,6 +755,7 @@ static void tls_device_core_ctrl_rx_resync(struct tls_context *tls_ctx,
/* head of next rec is already in, parser will sync for us */
if (tcp_inq(sk) > rxm->full_len) {
+ trace_tls_device_rx_resync_nh_schedule(sk);
ctx->resync_nh_do_now = 1;
} else {
struct tls_prot_info *prot = &tls_ctx->prot_info;
@@ -834,9 +854,9 @@ free_buf:
return err;
}
-int tls_device_decrypted(struct sock *sk, struct sk_buff *skb)
+int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
+ struct sk_buff *skb, struct strp_msg *rxm)
{
- struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_offload_context_rx *ctx = tls_offload_ctx_rx(tls_ctx);
int is_decrypted = skb->decrypted;
int is_encrypted = !is_decrypted;
@@ -848,6 +868,10 @@ int tls_device_decrypted(struct sock *sk, struct sk_buff *skb)
is_encrypted &= !skb_iter->decrypted;
}
+ trace_tls_device_decrypted(sk, tcp_sk(sk)->copied_seq - rxm->full_len,
+ tls_ctx->rx.rec_seq, rxm->full_len,
+ is_encrypted, is_decrypted);
+
ctx->sw.decrypted |= is_decrypted;
/* Return immediately if the record is either entirely plaintext or
@@ -1021,6 +1045,8 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_TX,
&ctx->crypto_send.info,
tcp_sk(sk)->write_seq);
+ trace_tls_device_offload_set(sk, TLS_OFFLOAD_CTX_DIR_TX,
+ tcp_sk(sk)->write_seq, rec_seq, rc);
if (rc)
goto release_lock;
@@ -1057,6 +1083,7 @@ free_marker_record:
int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
{
+ struct tls12_crypto_info_aes_gcm_128 *info;
struct tls_offload_context_rx *context;
struct net_device *netdev;
int rc = 0;
@@ -1104,6 +1131,9 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX,
&ctx->crypto_recv.info,
tcp_sk(sk)->copied_seq);
+ info = (void *)&ctx->crypto_recv.info;
+ trace_tls_device_offload_set(sk, TLS_OFFLOAD_CTX_DIR_RX,
+ tcp_sk(sk)->copied_seq, info->rec_seq, rc);
if (rc)
goto free_sw_resources;
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 0775ae40fcfb..5bb93dd5762b 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -41,7 +41,9 @@
#include <linux/inetdevice.h>
#include <linux/inet_diag.h>
+#include <net/snmp.h>
#include <net/tls.h>
+#include <net/tls_toe.h>
MODULE_AUTHOR("Mellanox Technologies");
MODULE_DESCRIPTION("Transport Layer Security Support");
@@ -58,14 +60,12 @@ static struct proto *saved_tcpv6_prot;
static DEFINE_MUTEX(tcpv6_prot_mutex);
static struct proto *saved_tcpv4_prot;
static DEFINE_MUTEX(tcpv4_prot_mutex);
-static LIST_HEAD(device_list);
-static DEFINE_SPINLOCK(device_spinlock);
static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
static struct proto_ops tls_sw_proto_ops;
static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
struct proto *base);
-static void update_sk_prot(struct sock *sk, struct tls_context *ctx)
+void update_sk_prot(struct sock *sk, struct tls_context *ctx)
{
int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
@@ -287,14 +287,19 @@ static void tls_sk_proto_cleanup(struct sock *sk,
kfree(ctx->tx.rec_seq);
kfree(ctx->tx.iv);
tls_sw_release_resources_tx(sk);
+ TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW);
} else if (ctx->tx_conf == TLS_HW) {
tls_device_free_resources_tx(sk);
+ TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXDEVICE);
}
- if (ctx->rx_conf == TLS_SW)
+ if (ctx->rx_conf == TLS_SW) {
tls_sw_release_resources_rx(sk);
- else if (ctx->rx_conf == TLS_HW)
+ TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW);
+ } else if (ctx->rx_conf == TLS_HW) {
tls_device_offload_cleanup_rx(sk);
+ TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXDEVICE);
+ }
}
static void tls_sk_proto_close(struct sock *sk, long timeout)
@@ -535,19 +540,29 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
if (tx) {
rc = tls_set_device_offload(sk, ctx);
conf = TLS_HW;
- if (rc) {
+ if (!rc) {
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXDEVICE);
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXDEVICE);
+ } else {
rc = tls_set_sw_offload(sk, ctx, 1);
if (rc)
goto err_crypto_info;
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW);
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW);
conf = TLS_SW;
}
} else {
rc = tls_set_device_offload_rx(sk, ctx);
conf = TLS_HW;
- if (rc) {
+ if (!rc) {
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICE);
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXDEVICE);
+ } else {
rc = tls_set_sw_offload(sk, ctx, 0);
if (rc)
goto err_crypto_info;
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW);
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW);
conf = TLS_SW;
}
tls_sw_strparser_arm(sk, ctx);
@@ -604,7 +619,7 @@ static int tls_setsockopt(struct sock *sk, int level, int optname,
return do_tls_setsockopt(sk, optname, optval, optlen);
}
-static struct tls_context *create_ctx(struct sock *sk)
+struct tls_context *tls_ctx_create(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tls_context *ctx;
@@ -645,90 +660,6 @@ static void tls_build_proto(struct sock *sk)
}
}
-static void tls_hw_sk_destruct(struct sock *sk)
-{
- struct tls_context *ctx = tls_get_ctx(sk);
- struct inet_connection_sock *icsk = inet_csk(sk);
-
- ctx->sk_destruct(sk);
- /* Free ctx */
- rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
- tls_ctx_free(sk, ctx);
-}
-
-static int tls_hw_prot(struct sock *sk)
-{
- struct tls_context *ctx;
- struct tls_device *dev;
- int rc = 0;
-
- spin_lock_bh(&device_spinlock);
- list_for_each_entry(dev, &device_list, dev_list) {
- if (dev->feature && dev->feature(dev)) {
- ctx = create_ctx(sk);
- if (!ctx)
- goto out;
-
- spin_unlock_bh(&device_spinlock);
- tls_build_proto(sk);
- ctx->sk_destruct = sk->sk_destruct;
- sk->sk_destruct = tls_hw_sk_destruct;
- ctx->rx_conf = TLS_HW_RECORD;
- ctx->tx_conf = TLS_HW_RECORD;
- update_sk_prot(sk, ctx);
- spin_lock_bh(&device_spinlock);
- rc = 1;
- break;
- }
- }
-out:
- spin_unlock_bh(&device_spinlock);
- return rc;
-}
-
-static void tls_hw_unhash(struct sock *sk)
-{
- struct tls_context *ctx = tls_get_ctx(sk);
- struct tls_device *dev;
-
- spin_lock_bh(&device_spinlock);
- list_for_each_entry(dev, &device_list, dev_list) {
- if (dev->unhash) {
- kref_get(&dev->kref);
- spin_unlock_bh(&device_spinlock);
- dev->unhash(dev, sk);
- kref_put(&dev->kref, dev->release);
- spin_lock_bh(&device_spinlock);
- }
- }
- spin_unlock_bh(&device_spinlock);
- ctx->sk_proto->unhash(sk);
-}
-
-static int tls_hw_hash(struct sock *sk)
-{
- struct tls_context *ctx = tls_get_ctx(sk);
- struct tls_device *dev;
- int err;
-
- err = ctx->sk_proto->hash(sk);
- spin_lock_bh(&device_spinlock);
- list_for_each_entry(dev, &device_list, dev_list) {
- if (dev->hash) {
- kref_get(&dev->kref);
- spin_unlock_bh(&device_spinlock);
- err |= dev->hash(dev, sk);
- kref_put(&dev->kref, dev->release);
- spin_lock_bh(&device_spinlock);
- }
- }
- spin_unlock_bh(&device_spinlock);
-
- if (err)
- tls_hw_unhash(sk);
- return err;
-}
-
static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
struct proto *base)
{
@@ -766,10 +697,11 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
prot[TLS_HW][TLS_HW] = prot[TLS_HW][TLS_SW];
#endif
-
+#ifdef CONFIG_TLS_TOE
prot[TLS_HW_RECORD][TLS_HW_RECORD] = *base;
- prot[TLS_HW_RECORD][TLS_HW_RECORD].hash = tls_hw_hash;
- prot[TLS_HW_RECORD][TLS_HW_RECORD].unhash = tls_hw_unhash;
+ prot[TLS_HW_RECORD][TLS_HW_RECORD].hash = tls_toe_hash;
+ prot[TLS_HW_RECORD][TLS_HW_RECORD].unhash = tls_toe_unhash;
+#endif
}
static int tls_init(struct sock *sk)
@@ -777,8 +709,12 @@ static int tls_init(struct sock *sk)
struct tls_context *ctx;
int rc = 0;
- if (tls_hw_prot(sk))
+ tls_build_proto(sk);
+
+#ifdef CONFIG_TLS_TOE
+ if (tls_toe_bypass(sk))
return 0;
+#endif
/* The TLS ulp is currently supported only for TCP sockets
* in ESTABLISHED state.
@@ -789,11 +725,9 @@ static int tls_init(struct sock *sk)
if (sk->sk_state != TCP_ESTABLISHED)
return -ENOTSUPP;
- tls_build_proto(sk);
-
/* allocate tls context */
write_lock_bh(&sk->sk_callback_lock);
- ctx = create_ctx(sk);
+ ctx = tls_ctx_create(sk);
if (!ctx) {
rc = -ENOMEM;
goto out;
@@ -879,21 +813,34 @@ static size_t tls_get_info_size(const struct sock *sk)
return size;
}
-void tls_register_device(struct tls_device *device)
+static int __net_init tls_init_net(struct net *net)
{
- spin_lock_bh(&device_spinlock);
- list_add_tail(&device->dev_list, &device_list);
- spin_unlock_bh(&device_spinlock);
+ int err;
+
+ net->mib.tls_statistics = alloc_percpu(struct linux_tls_mib);
+ if (!net->mib.tls_statistics)
+ return -ENOMEM;
+
+ err = tls_proc_init(net);
+ if (err)
+ goto err_free_stats;
+
+ return 0;
+err_free_stats:
+ free_percpu(net->mib.tls_statistics);
+ return err;
}
-EXPORT_SYMBOL(tls_register_device);
-void tls_unregister_device(struct tls_device *device)
+static void __net_exit tls_exit_net(struct net *net)
{
- spin_lock_bh(&device_spinlock);
- list_del(&device->dev_list);
- spin_unlock_bh(&device_spinlock);
+ tls_proc_fini(net);
+ free_percpu(net->mib.tls_statistics);
}
-EXPORT_SYMBOL(tls_unregister_device);
+
+static struct pernet_operations tls_proc_ops = {
+ .init = tls_init_net,
+ .exit = tls_exit_net,
+};
static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
.name = "tls",
@@ -906,6 +853,12 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
static int __init tls_register(void)
{
+ int err;
+
+ err = register_pernet_subsys(&tls_proc_ops);
+ if (err)
+ return err;
+
tls_sw_proto_ops = inet_stream_ops;
tls_sw_proto_ops.splice_read = tls_sw_splice_read;
@@ -919,6 +872,7 @@ static void __exit tls_unregister(void)
{
tcp_unregister_ulp(&tcp_tls_ulp_ops);
tls_device_cleanup();
+ unregister_pernet_subsys(&tls_proc_ops);
}
module_init(tls_register);
diff --git a/net/tls/tls_proc.c b/net/tls/tls_proc.c
new file mode 100644
index 000000000000..83d9c80a684e
--- /dev/null
+++ b/net/tls/tls_proc.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/snmp.h>
+#include <net/tls.h>
+
+static const struct snmp_mib tls_mib_list[] = {
+ SNMP_MIB_ITEM("TlsCurrTxSw", LINUX_MIB_TLSCURRTXSW),
+ SNMP_MIB_ITEM("TlsCurrRxSw", LINUX_MIB_TLSCURRRXSW),
+ SNMP_MIB_ITEM("TlsCurrTxDevice", LINUX_MIB_TLSCURRTXDEVICE),
+ SNMP_MIB_ITEM("TlsCurrRxDevice", LINUX_MIB_TLSCURRRXDEVICE),
+ SNMP_MIB_ITEM("TlsTxSw", LINUX_MIB_TLSTXSW),
+ SNMP_MIB_ITEM("TlsRxSw", LINUX_MIB_TLSRXSW),
+ SNMP_MIB_ITEM("TlsTxDevice", LINUX_MIB_TLSTXDEVICE),
+ SNMP_MIB_ITEM("TlsRxDevice", LINUX_MIB_TLSRXDEVICE),
+ SNMP_MIB_ITEM("TlsDecryptError", LINUX_MIB_TLSDECRYPTERROR),
+ SNMP_MIB_ITEM("TlsRxDeviceResync", LINUX_MIB_TLSRXDEVICERESYNC),
+ SNMP_MIB_SENTINEL
+};
+
+static int tls_statistics_seq_show(struct seq_file *seq, void *v)
+{
+ unsigned long buf[LINUX_MIB_TLSMAX] = {};
+ struct net *net = seq->private;
+ int i;
+
+ snmp_get_cpu_field_batch(buf, tls_mib_list, net->mib.tls_statistics);
+ for (i = 0; tls_mib_list[i].name; i++)
+ seq_printf(seq, "%-32s\t%lu\n", tls_mib_list[i].name, buf[i]);
+
+ return 0;
+}
+
+int __net_init tls_proc_init(struct net *net)
+{
+ if (!proc_create_net_single("tls_stat", 0444, net->proc_net,
+ tls_statistics_seq_show, NULL))
+ return -ENOMEM;
+ return 0;
+}
+
+void __net_exit tls_proc_fini(struct net *net)
+{
+ remove_proc_entry("tls_stat", net->proc_net);
+}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 446f23c1f3ce..141da093ff04 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -168,6 +168,9 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
/* Propagate if there was an err */
if (err) {
+ if (err == -EBADMSG)
+ TLS_INC_STATS(sock_net(skb->sk),
+ LINUX_MIB_TLSDECRYPTERROR);
ctx->async_wait.err = err;
tls_err_abort(skb->sk, err);
} else {
@@ -253,6 +256,8 @@ static int tls_do_decryption(struct sock *sk,
return ret;
ret = crypto_wait_req(ret, &ctx->async_wait);
+ } else if (ret == -EBADMSG) {
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR);
}
if (async)
@@ -1481,7 +1486,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
if (!ctx->decrypted) {
if (tls_ctx->rx_conf == TLS_HW) {
- err = tls_device_decrypted(sk, skb);
+ err = tls_device_decrypted(sk, tls_ctx, skb, rxm);
if (err < 0)
return err;
}
@@ -1509,7 +1514,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
rxm->offset += prot->prepend_size;
rxm->full_len -= prot->overhead_size;
tls_advance_record_sn(sk, prot, &tls_ctx->rx);
- ctx->decrypted = true;
+ ctx->decrypted = 1;
ctx->saved_data_ready(sk);
} else {
*zc = false;
@@ -1919,7 +1924,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
tls_err_abort(sk, EBADMSG);
goto splice_read_end;
}
- ctx->decrypted = true;
+ ctx->decrypted = 1;
}
rxm = strp_msg(skb);
@@ -2020,7 +2025,7 @@ static void tls_queue(struct strparser *strp, struct sk_buff *skb)
struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
- ctx->decrypted = false;
+ ctx->decrypted = 0;
ctx->recv_pkt = skb;
strp_pause(strp);
@@ -2376,10 +2381,11 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv);
if (crypto_info->version == TLS_1_3_VERSION)
- sw_ctx_rx->async_capable = false;
+ sw_ctx_rx->async_capable = 0;
else
sw_ctx_rx->async_capable =
- tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
+ !!(tfm->__crt_alg->cra_flags &
+ CRYPTO_ALG_ASYNC);
/* Set up strparser */
memset(&cb, 0, sizeof(cb));
diff --git a/net/tls/tls_toe.c b/net/tls/tls_toe.c
new file mode 100644
index 000000000000..7e1330f19165
--- /dev/null
+++ b/net/tls/tls_toe.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+#include <net/inet_connection_sock.h>
+#include <net/tls.h>
+#include <net/tls_toe.h>
+
+static LIST_HEAD(device_list);
+static DEFINE_SPINLOCK(device_spinlock);
+
+static void tls_toe_sk_destruct(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tls_context *ctx = tls_get_ctx(sk);
+
+ ctx->sk_destruct(sk);
+ /* Free ctx */
+ rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
+ tls_ctx_free(sk, ctx);
+}
+
+int tls_toe_bypass(struct sock *sk)
+{
+ struct tls_toe_device *dev;
+ struct tls_context *ctx;
+ int rc = 0;
+
+ spin_lock_bh(&device_spinlock);
+ list_for_each_entry(dev, &device_list, dev_list) {
+ if (dev->feature && dev->feature(dev)) {
+ ctx = tls_ctx_create(sk);
+ if (!ctx)
+ goto out;
+
+ ctx->sk_destruct = sk->sk_destruct;
+ sk->sk_destruct = tls_toe_sk_destruct;
+ ctx->rx_conf = TLS_HW_RECORD;
+ ctx->tx_conf = TLS_HW_RECORD;
+ update_sk_prot(sk, ctx);
+ rc = 1;
+ break;
+ }
+ }
+out:
+ spin_unlock_bh(&device_spinlock);
+ return rc;
+}
+
+void tls_toe_unhash(struct sock *sk)
+{
+ struct tls_context *ctx = tls_get_ctx(sk);
+ struct tls_toe_device *dev;
+
+ spin_lock_bh(&device_spinlock);
+ list_for_each_entry(dev, &device_list, dev_list) {
+ if (dev->unhash) {
+ kref_get(&dev->kref);
+ spin_unlock_bh(&device_spinlock);
+ dev->unhash(dev, sk);
+ kref_put(&dev->kref, dev->release);
+ spin_lock_bh(&device_spinlock);
+ }
+ }
+ spin_unlock_bh(&device_spinlock);
+ ctx->sk_proto->unhash(sk);
+}
+
+int tls_toe_hash(struct sock *sk)
+{
+ struct tls_context *ctx = tls_get_ctx(sk);
+ struct tls_toe_device *dev;
+ int err;
+
+ err = ctx->sk_proto->hash(sk);
+ spin_lock_bh(&device_spinlock);
+ list_for_each_entry(dev, &device_list, dev_list) {
+ if (dev->hash) {
+ kref_get(&dev->kref);
+ spin_unlock_bh(&device_spinlock);
+ err |= dev->hash(dev, sk);
+ kref_put(&dev->kref, dev->release);
+ spin_lock_bh(&device_spinlock);
+ }
+ }
+ spin_unlock_bh(&device_spinlock);
+
+ if (err)
+ tls_toe_unhash(sk);
+ return err;
+}
+
+void tls_toe_register_device(struct tls_toe_device *device)
+{
+ spin_lock_bh(&device_spinlock);
+ list_add_tail(&device->dev_list, &device_list);
+ spin_unlock_bh(&device_spinlock);
+}
+EXPORT_SYMBOL(tls_toe_register_device);
+
+void tls_toe_unregister_device(struct tls_toe_device *device)
+{
+ spin_lock_bh(&device_spinlock);
+ list_del(&device->dev_list);
+ spin_unlock_bh(&device_spinlock);
+}
+EXPORT_SYMBOL(tls_toe_unregister_device);
diff --git a/net/tls/trace.c b/net/tls/trace.c
new file mode 100644
index 000000000000..e374913cf9c9
--- /dev/null
+++ b/net/tls/trace.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#include <linux/module.h>
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+#endif
diff --git a/net/tls/trace.h b/net/tls/trace.h
new file mode 100644
index 000000000000..9ba5f600ea43
--- /dev/null
+++ b/net/tls/trace.h
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM tls
+
+#if !defined(_TLS_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _TLS_TRACE_H_
+
+#include <asm/unaligned.h>
+#include <linux/tracepoint.h>
+
+struct sock;
+
+TRACE_EVENT(tls_device_offload_set,
+
+ TP_PROTO(struct sock *sk, int dir, u32 tcp_seq, u8 *rec_no, int ret),
+
+ TP_ARGS(sk, dir, tcp_seq, rec_no, ret),
+
+ TP_STRUCT__entry(
+ __field( struct sock *, sk )
+ __field( u64, rec_no )
+ __field( int, dir )
+ __field( u32, tcp_seq )
+ __field( int, ret )
+ ),
+
+ TP_fast_assign(
+ __entry->sk = sk;
+ __entry->rec_no = get_unaligned_be64(rec_no);
+ __entry->dir = dir;
+ __entry->tcp_seq = tcp_seq;
+ __entry->ret = ret;
+ ),
+
+ TP_printk(
+ "sk=%p direction=%d tcp_seq=%u rec_no=%llu ret=%d",
+ __entry->sk, __entry->dir, __entry->tcp_seq, __entry->rec_no,
+ __entry->ret
+ )
+);
+
+TRACE_EVENT(tls_device_decrypted,
+
+ TP_PROTO(struct sock *sk, u32 tcp_seq, u8 *rec_no, u32 rec_len,
+ bool encrypted, bool decrypted),
+
+ TP_ARGS(sk, tcp_seq, rec_no, rec_len, encrypted, decrypted),
+
+ TP_STRUCT__entry(
+ __field( struct sock *, sk )
+ __field( u64, rec_no )
+ __field( u32, tcp_seq )
+ __field( u32, rec_len )
+ __field( bool, encrypted )
+ __field( bool, decrypted )
+ ),
+
+ TP_fast_assign(
+ __entry->sk = sk;
+ __entry->rec_no = get_unaligned_be64(rec_no);
+ __entry->tcp_seq = tcp_seq;
+ __entry->rec_len = rec_len;
+ __entry->encrypted = encrypted;
+ __entry->decrypted = decrypted;
+ ),
+
+ TP_printk(
+ "sk=%p tcp_seq=%u rec_no=%llu len=%u encrypted=%d decrypted=%d",
+ __entry->sk, __entry->tcp_seq,
+ __entry->rec_no, __entry->rec_len,
+ __entry->encrypted, __entry->decrypted
+ )
+);
+
+TRACE_EVENT(tls_device_rx_resync_send,
+
+ TP_PROTO(struct sock *sk, u32 tcp_seq, u8 *rec_no, int sync_type),
+
+ TP_ARGS(sk, tcp_seq, rec_no, sync_type),
+
+ TP_STRUCT__entry(
+ __field( struct sock *, sk )
+ __field( u64, rec_no )
+ __field( u32, tcp_seq )
+ __field( int, sync_type )
+ ),
+
+ TP_fast_assign(
+ __entry->sk = sk;
+ __entry->rec_no = get_unaligned_be64(rec_no);
+ __entry->tcp_seq = tcp_seq;
+ __entry->sync_type = sync_type;
+ ),
+
+ TP_printk(
+ "sk=%p tcp_seq=%u rec_no=%llu sync_type=%d",
+ __entry->sk, __entry->tcp_seq, __entry->rec_no,
+ __entry->sync_type
+ )
+);
+
+TRACE_EVENT(tls_device_rx_resync_nh_schedule,
+
+ TP_PROTO(struct sock *sk),
+
+ TP_ARGS(sk),
+
+ TP_STRUCT__entry(
+ __field( struct sock *, sk )
+ ),
+
+ TP_fast_assign(
+ __entry->sk = sk;
+ ),
+
+ TP_printk(
+ "sk=%p", __entry->sk
+ )
+);
+
+TRACE_EVENT(tls_device_rx_resync_nh_delay,
+
+ TP_PROTO(struct sock *sk, u32 sock_data, u32 rec_len),
+
+ TP_ARGS(sk, sock_data, rec_len),
+
+ TP_STRUCT__entry(
+ __field( struct sock *, sk )
+ __field( u32, sock_data )
+ __field( u32, rec_len )
+ ),
+
+ TP_fast_assign(
+ __entry->sk = sk;
+ __entry->sock_data = sock_data;
+ __entry->rec_len = rec_len;
+ ),
+
+ TP_printk(
+ "sk=%p sock_data=%u rec_len=%u",
+ __entry->sk, __entry->sock_data, __entry->rec_len
+ )
+);
+
+TRACE_EVENT(tls_device_tx_resync_req,
+
+ TP_PROTO(struct sock *sk, u32 tcp_seq, u32 exp_tcp_seq),
+
+ TP_ARGS(sk, tcp_seq, exp_tcp_seq),
+
+ TP_STRUCT__entry(
+ __field( struct sock *, sk )
+ __field( u32, tcp_seq )
+ __field( u32, exp_tcp_seq )
+ ),
+
+ TP_fast_assign(
+ __entry->sk = sk;
+ __entry->tcp_seq = tcp_seq;
+ __entry->exp_tcp_seq = exp_tcp_seq;
+ ),
+
+ TP_printk(
+ "sk=%p tcp_seq=%u exp_tcp_seq=%u",
+ __entry->sk, __entry->tcp_seq, __entry->exp_tcp_seq
+ )
+);
+
+TRACE_EVENT(tls_device_tx_resync_send,
+
+ TP_PROTO(struct sock *sk, u32 tcp_seq, u8 *rec_no),
+
+ TP_ARGS(sk, tcp_seq, rec_no),
+
+ TP_STRUCT__entry(
+ __field( struct sock *, sk )
+ __field( u64, rec_no )
+ __field( u32, tcp_seq )
+ ),
+
+ TP_fast_assign(
+ __entry->sk = sk;
+ __entry->rec_no = get_unaligned_be64(rec_no);
+ __entry->tcp_seq = tcp_seq;
+ ),
+
+ TP_printk(
+ "sk=%p tcp_seq=%u rec_no=%llu",
+ __entry->sk, __entry->tcp_seq, __entry->rec_no
+ )
+);
+
+#endif /* _TLS_TRACE_H_ */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+#include <trace/define_trace.h>
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0d8da809bea2..193cba2d777b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -284,11 +284,9 @@ static struct sock *__unix_find_socket_byname(struct net *net,
if (u->addr->len == len &&
!memcmp(u->addr->name, sunname, len))
- goto found;
+ return s;
}
- s = NULL;
-found:
- return s;
+ return NULL;
}
static inline struct sock *unix_find_socket_byname(struct net *net,
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 582a3e4dfce2..1f4fde4711b6 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -439,7 +439,7 @@ static void vsock_pending_work(struct work_struct *work)
if (vsock_is_pending(sk)) {
vsock_remove_pending(listener, sk);
- listener->sk_ack_backlog--;
+ sk_acceptq_removed(listener);
} else if (!vsk->rejected) {
/* We are not on the pending list and accept() did not reject
* us, so we must have been accepted by our user process. We
@@ -641,7 +641,6 @@ EXPORT_SYMBOL_GPL(__vsock_create);
static void __vsock_release(struct sock *sk, int level)
{
if (sk) {
- struct sk_buff *skb;
struct sock *pending;
struct vsock_sock *vsk;
@@ -662,8 +661,7 @@ static void __vsock_release(struct sock *sk, int level)
sock_orphan(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
- while ((skb = skb_dequeue(&sk->sk_receive_queue)))
- kfree_skb(skb);
+ skb_queue_purge(&sk->sk_receive_queue);
/* Clean up any sockets that never were accepted. */
while ((pending = vsock_dequeue_accept(sk)) != NULL) {
@@ -1301,7 +1299,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
err = -listener->sk_err;
if (connected) {
- listener->sk_ack_backlog--;
+ sk_acceptq_removed(listener);
lock_sock_nested(connected, SINGLE_DEPTH_NESTING);
vconnected = vsock_sk(connected);
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index c443db7af8d4..7fa09c5e4625 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -13,15 +13,16 @@
#include <linux/hyperv.h>
#include <net/sock.h>
#include <net/af_vsock.h>
+#include <asm/hyperv-tlfs.h>
/* Older (VMBUS version 'VERSION_WIN10' or before) Windows hosts have some
- * stricter requirements on the hv_sock ring buffer size of six 4K pages. Newer
- * hosts don't have this limitation; but, keep the defaults the same for compat.
+ * stricter requirements on the hv_sock ring buffer size of six 4K pages.
+ * hyperv-tlfs defines HV_HYP_PAGE_SIZE as 4K. Newer hosts don't have this
+ * limitation; but, keep the defaults the same for compat.
*/
-#define PAGE_SIZE_4K 4096
-#define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6)
-#define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6)
-#define RINGBUFFER_HVS_MAX_SIZE (PAGE_SIZE_4K * 64)
+#define RINGBUFFER_HVS_RCV_SIZE (HV_HYP_PAGE_SIZE * 6)
+#define RINGBUFFER_HVS_SND_SIZE (HV_HYP_PAGE_SIZE * 6)
+#define RINGBUFFER_HVS_MAX_SIZE (HV_HYP_PAGE_SIZE * 64)
/* The MTU is 16KB per the host side's design */
#define HVS_MTU_SIZE (1024 * 16)
@@ -54,7 +55,8 @@ struct hvs_recv_buf {
* ringbuffer APIs that allow us to directly copy data from userspace buffer
* to VMBus ringbuffer.
*/
-#define HVS_SEND_BUF_SIZE (PAGE_SIZE_4K - sizeof(struct vmpipe_proto_header))
+#define HVS_SEND_BUF_SIZE \
+ (HV_HYP_PAGE_SIZE - sizeof(struct vmpipe_proto_header))
struct hvs_send_buf {
/* The header before the payload data */
@@ -393,10 +395,10 @@ static void hvs_open_connection(struct vmbus_channel *chan)
} else {
sndbuf = max_t(int, sk->sk_sndbuf, RINGBUFFER_HVS_SND_SIZE);
sndbuf = min_t(int, sndbuf, RINGBUFFER_HVS_MAX_SIZE);
- sndbuf = ALIGN(sndbuf, PAGE_SIZE);
+ sndbuf = ALIGN(sndbuf, HV_HYP_PAGE_SIZE);
rcvbuf = max_t(int, sk->sk_rcvbuf, RINGBUFFER_HVS_RCV_SIZE);
rcvbuf = min_t(int, rcvbuf, RINGBUFFER_HVS_MAX_SIZE);
- rcvbuf = ALIGN(rcvbuf, PAGE_SIZE);
+ rcvbuf = ALIGN(rcvbuf, HV_HYP_PAGE_SIZE);
}
ret = vmbus_open(chan, sndbuf, rcvbuf, NULL, 0, hvs_channel_cb,
@@ -426,7 +428,7 @@ static void hvs_open_connection(struct vmbus_channel *chan)
if (conn_from_host) {
new->sk_state = TCP_ESTABLISHED;
- sk->sk_ack_backlog++;
+ sk_acceptq_added(sk);
hvs_addr_init(&vnew->local_addr, if_type);
hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr);
@@ -670,7 +672,7 @@ static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
ssize_t ret = 0;
ssize_t bytes_written = 0;
- BUILD_BUG_ON(sizeof(*send_buf) != PAGE_SIZE_4K);
+ BUILD_BUG_ON(sizeof(*send_buf) != HV_HYP_PAGE_SIZE);
send_buf = kmalloc(sizeof(*send_buf), GFP_KERNEL);
if (!send_buf)
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index fb2060dffb0a..828edd88488c 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -268,6 +268,55 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk,
}
static ssize_t
+virtio_transport_stream_do_peek(struct vsock_sock *vsk,
+ struct msghdr *msg,
+ size_t len)
+{
+ struct virtio_vsock_sock *vvs = vsk->trans;
+ struct virtio_vsock_pkt *pkt;
+ size_t bytes, total = 0, off;
+ int err = -EFAULT;
+
+ spin_lock_bh(&vvs->rx_lock);
+
+ list_for_each_entry(pkt, &vvs->rx_queue, list) {
+ off = pkt->off;
+
+ if (total == len)
+ break;
+
+ while (total < len && off < pkt->len) {
+ bytes = len - total;
+ if (bytes > pkt->len - off)
+ bytes = pkt->len - off;
+
+ /* sk_lock is held by caller so no one else can dequeue.
+ * Unlock rx_lock since memcpy_to_msg() may sleep.
+ */
+ spin_unlock_bh(&vvs->rx_lock);
+
+ err = memcpy_to_msg(msg, pkt->buf + off, bytes);
+ if (err)
+ goto out;
+
+ spin_lock_bh(&vvs->rx_lock);
+
+ total += bytes;
+ off += bytes;
+ }
+ }
+
+ spin_unlock_bh(&vvs->rx_lock);
+
+ return total;
+
+out:
+ if (total)
+ err = total;
+ return err;
+}
+
+static ssize_t
virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
struct msghdr *msg,
size_t len)
@@ -339,9 +388,9 @@ virtio_transport_stream_dequeue(struct vsock_sock *vsk,
size_t len, int flags)
{
if (flags & MSG_PEEK)
- return -EOPNOTSUPP;
-
- return virtio_transport_stream_do_dequeue(vsk, msg, len);
+ return virtio_transport_stream_do_peek(vsk, msg, len);
+ else
+ return virtio_transport_stream_do_dequeue(vsk, msg, len);
}
EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue);
@@ -1019,7 +1068,7 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt)
return -ENOMEM;
}
- sk->sk_ack_backlog++;
+ sk_acceptq_added(sk);
lock_sock_nested(child, SINGLE_DEPTH_NESTING);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 8c9c4ed90fa7..6ba98a1efe2e 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1098,7 +1098,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
}
vsock_add_pending(sk, pending);
- sk->sk_ack_backlog++;
+ sk_acceptq_added(sk);
pending->sk_state = TCP_SYN_SENT;
vmci_trans(vpending)->produce_size =
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7b72286922f7..da5262b2298b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -624,6 +624,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
.len = SAE_PASSWORD_MAX_LEN },
[NL80211_ATTR_TWT_RESPONDER] = { .type = NLA_FLAG },
[NL80211_ATTR_HE_OBSS_PD] = NLA_POLICY_NESTED(he_obss_pd_policy),
+ [NL80211_ATTR_VLAN_ID] = NLA_POLICY_RANGE(NLA_U16, 1, VLAN_N_VID - 2),
};
/* policy for the key attributes */
@@ -3940,6 +3941,10 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
key.type != NL80211_KEYTYPE_GROUP)
return -EINVAL;
+ if (key.type == NL80211_KEYTYPE_GROUP &&
+ info->attrs[NL80211_ATTR_VLAN_ID])
+ key.p.vlan_id = nla_get_u16(info->attrs[NL80211_ATTR_VLAN_ID]);
+
if (!rdev->ops->add_key)
return -EOPNOTSUPP;
@@ -5711,6 +5716,9 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_STA_AID])
params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]);
+ if (info->attrs[NL80211_ATTR_VLAN_ID])
+ params.vlan_id = nla_get_u16(info->attrs[NL80211_ATTR_VLAN_ID]);
+
if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL])
params.listen_interval =
nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]);
@@ -5856,6 +5864,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
params.listen_interval =
nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]);
+ if (info->attrs[NL80211_ATTR_VLAN_ID])
+ params.vlan_id = nla_get_u16(info->attrs[NL80211_ATTR_VLAN_ID]);
+
if (info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]) {
params.support_p2p_ps =
nla_get_u8(info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]);
@@ -8265,10 +8276,8 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
/* leave request id zero for legacy request
* or if driver does not support multi-scheduled scan
*/
- if (want_multi && rdev->wiphy.max_sched_scan_reqs > 1) {
- while (!sched_scan_req->reqid)
- sched_scan_req->reqid = cfg80211_assign_cookie(rdev);
- }
+ if (want_multi && rdev->wiphy.max_sched_scan_reqs > 1)
+ sched_scan_req->reqid = cfg80211_assign_cookie(rdev);
err = rdev_sched_scan_start(rdev, dev, sched_scan_req);
if (err)
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index dc8f689bd469..f9e83031a40a 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -114,7 +114,7 @@ void regulatory_hint_country_ie(struct wiphy *wiphy,
u8 country_ie_len);
/**
- * regulatory_hint_disconnect - informs all devices have been disconneted
+ * regulatory_hint_disconnect - informs all devices have been disconnected
*
* Regulotory rules can be enhanced further upon scanning and upon
* connection to an AP. These rules become stale if we disconnect
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 6aee9f5e8e71..c34f7d077604 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -891,7 +891,7 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags,
/* Now attach up the new socket */
skb->sk = NULL;
kfree_skb(skb);
- sk->sk_ack_backlog--;
+ sk_acceptq_removed(sk);
newsock->state = SS_CONNECTED;
rc = 0;
out2:
@@ -1062,7 +1062,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
skb_copy_from_linear_data(skb, makex25->calluserdata.cuddata, skb->len);
makex25->calluserdata.cudlength = skb->len;
- sk->sk_ack_backlog++;
+ sk_acceptq_added(sk);
x25_insert_socket(make);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 9044073fbf22..6040bc2b0088 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -196,7 +196,7 @@ static bool xsk_is_bound(struct xdp_sock *xs)
return false;
}
-int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
u32 len;
@@ -212,7 +212,7 @@ int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
__xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len);
}
-void xsk_flush(struct xdp_sock *xs)
+static void xsk_flush(struct xdp_sock *xs)
{
xskq_produce_flush_desc(xs->rx);
xs->sk.sk_data_ready(&xs->sk);
@@ -264,6 +264,35 @@ out_unlock:
return err;
}
+int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+ struct xdp_sock *xs)
+{
+ struct xsk_map *m = container_of(map, struct xsk_map, map);
+ struct list_head *flush_list = this_cpu_ptr(m->flush_list);
+ int err;
+
+ err = xsk_rcv(xs, xdp);
+ if (err)
+ return err;
+
+ if (!xs->flush_node.prev)
+ list_add(&xs->flush_node, flush_list);
+
+ return 0;
+}
+
+void __xsk_map_flush(struct bpf_map *map)
+{
+ struct xsk_map *m = container_of(map, struct xsk_map, map);
+ struct list_head *flush_list = this_cpu_ptr(m->flush_list);
+ struct xdp_sock *xs, *tmp;
+
+ list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
+ xsk_flush(xs);
+ __list_del_clearprev(&xs->flush_node);
+ }
+}
+
void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
{
xskq_produce_flush_addr_n(umem->cq, nb_entries);