aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/batman-adv/Kconfig11
-rw-r--r--net/batman-adv/Makefile3
-rw-r--r--net/batman-adv/bat_iv_ogm.c330
-rw-r--r--net/batman-adv/debugfs.c37
-rw-r--r--net/batman-adv/debugfs.h6
-rw-r--r--net/batman-adv/hard-interface.c47
-rw-r--r--net/batman-adv/icmp_socket.c3
-rw-r--r--net/batman-adv/log.c20
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/originator.c107
-rw-r--r--net/batman-adv/originator.h4
-rw-r--r--net/batman-adv/soft-interface.c2
-rw-r--r--net/batman-adv/trace.c22
-rw-r--r--net/batman-adv/trace.h78
-rw-r--r--net/batman-adv/types.h62
-rw-r--r--net/bluetooth/bnep/core.c7
-rw-r--r--net/bluetooth/cmtp/core.c14
-rw-r--r--net/bluetooth/hci_core.c65
-rw-r--r--net/bluetooth/hci_event.c47
-rw-r--r--net/bluetooth/hidp/core.c13
-rw-r--r--net/bluetooth/l2cap_core.c66
-rw-r--r--net/bluetooth/smp.c23
-rw-r--r--net/bpf/test_run.c20
-rw-r--r--net/bridge/br.c16
-rw-r--r--net/bridge/br_arp_nd_proxy.c15
-rw-r--r--net/bridge/br_device.c6
-rw-r--r--net/bridge/br_fdb.c20
-rw-r--r--net/bridge/br_if.c9
-rw-r--r--net/bridge/br_input.c2
-rw-r--r--net/bridge/br_mdb.c36
-rw-r--r--net/bridge/br_multicast.c55
-rw-r--r--net/bridge/br_netfilter_hooks.c7
-rw-r--r--net/bridge/br_netlink.c43
-rw-r--r--net/bridge/br_private.h69
-rw-r--r--net/bridge/br_sysfs_br.c49
-rw-r--r--net/bridge/br_vlan.c84
-rw-r--r--net/caif/cfrfml.c3
-rw-r--r--net/core/dev.c13
-rw-r--r--net/core/devlink.c25
-rw-r--r--net/core/ethtool.c181
-rw-r--r--net/core/fib_rules.c36
-rw-r--r--net/core/filter.c347
-rw-r--r--net/core/flow_dissector.c150
-rw-r--r--net/core/gen_stats.c73
-rw-r--r--net/core/link_watch.c2
-rw-r--r--net/core/neighbour.c206
-rw-r--r--net/core/net_namespace.c6
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/rtnetlink.c375
-rw-r--r--net/core/skbuff.c58
-rw-r--r--net/core/sock.c4
-rw-r--r--net/core/xdp.c53
-rw-r--r--net/decnet/dn_dev.c2
-rw-r--r--net/dns_resolver/dns_key.c67
-rw-r--r--net/dns_resolver/dns_query.c5
-rw-r--r--net/dsa/Kconfig3
-rw-r--r--net/dsa/Makefile1
-rw-r--r--net/dsa/dsa.c49
-rw-r--r--net/dsa/dsa_priv.h4
-rw-r--r--net/dsa/legacy.c3
-rw-r--r--net/dsa/slave.c28
-rw-r--r--net/dsa/tag_gswip.c109
-rw-r--r--net/ieee802154/6lowpan/reassembly.c3
-rw-r--r--net/ipv4/ah4.c4
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/cipso_ipv4.c11
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/devinet.c141
-rw-r--r--net/ipv4/esp4.c11
-rw-r--r--net/ipv4/fib_frontend.c86
-rw-r--r--net/ipv4/fib_semantics.c37
-rw-r--r--net/ipv4/gre_demux.c7
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/ip_fragment.c27
-rw-r--r--net/ipv4/ip_gre.c15
-rw-r--r--net/ipv4/ip_input.c6
-rw-r--r--net/ipv4/ip_output.c4
-rw-r--r--net/ipv4/ip_vti.c4
-rw-r--r--net/ipv4/ipcomp.c4
-rw-r--r--net/ipv4/ipip.c5
-rw-r--r--net/ipv4/ipmr.c39
-rw-r--r--net/ipv4/metrics.c30
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c17
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c22
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c27
-rw-r--r--net/ipv4/ping.c2
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c48
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/tcp.c44
-rw-r--r--net/ipv4/tcp_bbr.c7
-rw-r--r--net/ipv4/tcp_dctcp.c55
-rw-r--r--net/ipv4/tcp_dctcp.h40
-rw-r--r--net/ipv4/tcp_input.c51
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/tcp_output.c93
-rw-r--r--net/ipv4/tcp_rate.c15
-rw-r--r--net/ipv4/tcp_recovery.c5
-rw-r--r--net/ipv4/tcp_timer.c2
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv4/udp_offload.c2
-rw-r--r--net/ipv6/addrconf.c254
-rw-r--r--net/ipv6/addrlabel.c34
-rw-r--r--net/ipv6/af_inet6.c7
-rw-r--r--net/ipv6/esp6.c7
-rw-r--r--net/ipv6/ip6_fib.c36
-rw-r--r--net/ipv6/ip6_gre.c26
-rw-r--r--net/ipv6/ip6_input.c3
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/ip6mr.c57
-rw-r--r--net/ipv6/ipv6_sockglue.c11
-rw-r--r--net/ipv6/mcast.c2
-rw-r--r--net/ipv6/ndisc.c4
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c5
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c10
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c2
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c19
-rw-r--r--net/ipv6/reassembly.c14
-rw-r--r--net/ipv6/route.c209
-rw-r--r--net/ipv6/sit.c6
-rw-r--r--net/ipv6/udp.c2
-rw-r--r--net/ipv6/udp_offload.c2
-rw-r--r--net/iucv/af_iucv.c44
-rw-r--r--net/llc/llc_core.c4
-rw-r--r--net/mac80211/Kconfig17
-rw-r--r--net/mac80211/Makefile11
-rw-r--r--net/mac80211/cfg.c142
-rw-r--r--net/mac80211/debugfs.c4
-rw-r--r--net/mac80211/debugfs_sta.c364
-rw-r--r--net/mac80211/driver-ops.h26
-rw-r--r--net/mac80211/ibss.c4
-rw-r--r--net/mac80211/ieee80211_i.h11
-rw-r--r--net/mac80211/key.c111
-rw-r--r--net/mac80211/main.c78
-rw-r--r--net/mac80211/mesh.c5
-rw-r--r--net/mac80211/mlme.c130
-rw-r--r--net/mac80211/rate.h13
-rw-r--r--net/mac80211/rc80211_minstrel.c162
-rw-r--r--net/mac80211/rc80211_minstrel.h35
-rw-r--r--net/mac80211/rc80211_minstrel_debugfs.c68
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c298
-rw-r--r--net/mac80211/rc80211_minstrel_ht.h20
-rw-r--r--net/mac80211/rc80211_minstrel_ht_debugfs.c58
-rw-r--r--net/mac80211/rx.c55
-rw-r--r--net/mac80211/spectmgmt.c5
-rw-r--r--net/mac80211/sta_info.c27
-rw-r--r--net/mac80211/status.c19
-rw-r--r--net/mac80211/trace.h23
-rw-r--r--net/mac80211/tx.c75
-rw-r--r--net/mac80211/util.c166
-rw-r--r--net/mac80211/vht.c20
-rw-r--r--net/mpls/af_mpls.c62
-rw-r--r--net/ncsi/internal.h5
-rw-r--r--net/ncsi/ncsi-cmd.c30
-rw-r--r--net/ncsi/ncsi-netlink.c1
-rw-r--r--net/ncsi/ncsi-pkt.h14
-rw-r--r--net/ncsi/ncsi-rsp.c43
-rw-r--r--net/netfilter/Kconfig7
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c105
-rw-r--r--net/netfilter/nf_conntrack_expect.c3
-rw-r--r--net/netfilter/nf_conntrack_netlink.c73
-rw-r--r--net/netfilter/nf_conntrack_proto.c117
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c155
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c28
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c44
-rw-r--r--net/netfilter/nf_conntrack_proto_icmp.c78
-rw-r--r--net/netfilter/nf_conntrack_proto_icmpv6.c80
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c253
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c251
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c236
-rw-r--r--net/netfilter/nf_conntrack_standalone.c9
-rw-r--r--net/netfilter/nf_flow_table_core.c41
-rw-r--r--net/netfilter/nf_flow_table_ip.c6
-rw-r--r--net/netfilter/nf_nat_helper.c4
-rw-r--r--net/netfilter/nf_nat_redirect.c4
-rw-r--r--net/netfilter/nf_tables_api.c120
-rw-r--r--net/netfilter/nf_tables_core.c28
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c59
-rw-r--r--net/netfilter/nfnetlink_queue.c2
-rw-r--r--net/netfilter/nft_cmp.c6
-rw-r--r--net/netfilter/nft_ct.c22
-rw-r--r--net/netfilter/nft_dynset.c21
-rw-r--r--net/netfilter/nft_lookup.c20
-rw-r--r--net/netfilter/nft_meta.c116
-rw-r--r--net/netfilter/nft_objref.c20
-rw-r--r--net/netfilter/nft_reject.c6
-rw-r--r--net/netfilter/nft_rt.c11
-rw-r--r--net/netfilter/nft_set_hash.c38
-rw-r--r--net/netfilter/nft_xfrm.c293
-rw-r--r--net/netfilter/xt_CT.c2
-rw-r--r--net/netfilter/xt_IDLETIMER.c4
-rw-r--r--net/netfilter/xt_SECMARK.c2
-rw-r--r--net/netfilter/xt_cgroup.c72
-rw-r--r--net/netfilter/xt_quota.c55
-rw-r--r--net/netlink/af_netlink.c44
-rw-r--r--net/netlink/af_netlink.h1
-rw-r--r--net/nfc/nci/uart.c6
-rw-r--r--net/openvswitch/conntrack.c8
-rw-r--r--net/openvswitch/datapath.c20
-rw-r--r--net/openvswitch/flow.c22
-rw-r--r--net/openvswitch/vport-internal_dev.c5
-rw-r--r--net/packet/af_packet.c17
-rw-r--r--net/rds/rds.h2
-rw-r--r--net/rds/recv.c19
-rw-r--r--net/rfkill/core.c4
-rw-r--r--net/rxrpc/af_rxrpc.c17
-rw-r--r--net/rxrpc/ar-internal.h4
-rw-r--r--net/rxrpc/call_accept.c2
-rw-r--r--net/rxrpc/conn_object.c7
-rw-r--r--net/rxrpc/input.c2
-rw-r--r--net/rxrpc/local_event.c2
-rw-r--r--net/rxrpc/output.c10
-rw-r--r--net/rxrpc/peer_event.c12
-rw-r--r--net/rxrpc/recvmsg.c43
-rw-r--r--net/rxrpc/skbuff.c15
-rw-r--r--net/rxrpc/utils.c23
-rw-r--r--net/sched/Kconfig11
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c80
-rw-r--r--net/sched/act_bpf.c3
-rw-r--r--net/sched/act_connmark.c14
-rw-r--r--net/sched/act_csum.c3
-rw-r--r--net/sched/act_gact.c9
-rw-r--r--net/sched/act_ife.c3
-rw-r--r--net/sched/act_ipt.c6
-rw-r--r--net/sched/act_mirred.c8
-rw-r--r--net/sched/act_nat.c18
-rw-r--r--net/sched/act_pedit.c3
-rw-r--r--net/sched/act_police.c189
-rw-r--r--net/sched/act_sample.c3
-rw-r--r--net/sched/act_simple.c3
-rw-r--r--net/sched/act_skbedit.c26
-rw-r--r--net/sched/act_skbmod.c3
-rw-r--r--net/sched/act_tunnel_key.c3
-rw-r--r--net/sched/act_vlan.c3
-rw-r--r--net/sched/cls_api.c250
-rw-r--r--net/sched/cls_flower.c7
-rw-r--r--net/sched/cls_u32.c121
-rw-r--r--net/sched/sch_api.c33
-rw-r--r--net/sched/sch_atm.c2
-rw-r--r--net/sched/sch_cake.c6
-rw-r--r--net/sched/sch_cbq.c2
-rw-r--r--net/sched/sch_cbs.c2
-rw-r--r--net/sched/sch_drr.c4
-rw-r--r--net/sched/sch_dsmark.c2
-rw-r--r--net/sched/sch_fifo.c2
-rw-r--r--net/sched/sch_fq.c83
-rw-r--r--net/sched/sch_fq_codel.c2
-rw-r--r--net/sched/sch_generic.c66
-rw-r--r--net/sched/sch_hfsc.c2
-rw-r--r--net/sched/sch_hhf.c2
-rw-r--r--net/sched/sch_htb.c116
-rw-r--r--net/sched/sch_mq.c4
-rw-r--r--net/sched/sch_mqprio.c4
-rw-r--r--net/sched/sch_multiq.c6
-rw-r--r--net/sched/sch_netem.c16
-rw-r--r--net/sched/sch_pie.c36
-rw-r--r--net/sched/sch_prio.c6
-rw-r--r--net/sched/sch_qfq.c4
-rw-r--r--net/sched/sch_red.c4
-rw-r--r--net/sched/sch_sfb.c4
-rw-r--r--net/sched/sch_taprio.c962
-rw-r--r--net/sched/sch_tbf.c6
-rw-r--r--net/sctp/ulpqueue.c2
-rw-r--r--net/socket.c2
-rw-r--r--net/tipc/bearer.c2
-rw-r--r--net/tipc/msg.c78
-rw-r--r--net/tipc/msg.h11
-rw-r--r--net/tipc/node.h12
-rw-r--r--net/tipc/socket.c207
-rw-r--r--net/tipc/topsrv.c10
-rw-r--r--net/tls/tls_main.c54
-rw-r--r--net/tls/tls_sw.c869
-rw-r--r--net/wireless/core.c83
-rw-r--r--net/wireless/core.h14
-rw-r--r--net/wireless/lib80211_crypt_tkip.c59
-rw-r--r--net/wireless/lib80211_crypt_wep.c52
-rw-r--r--net/wireless/nl80211.c840
-rw-r--r--net/wireless/rdev-ops.h15
-rw-r--r--net/wireless/reg.c121
-rw-r--r--net/wireless/trace.h235
-rw-r--r--net/wireless/util.c160
-rw-r--r--net/xdp/xdp_umem.c106
-rw-r--r--net/xdp/xdp_umem.h12
-rw-r--r--net/xdp/xdp_umem_props.h14
-rw-r--r--net/xdp/xsk.c54
-rw-r--r--net/xdp/xsk_queue.c60
-rw-r--r--net/xdp/xsk_queue.h16
-rw-r--r--net/xfrm/xfrm_device.c8
-rw-r--r--net/xfrm/xfrm_interface.c6
-rw-r--r--net/xfrm/xfrm_output.c2
-rw-r--r--net/xfrm/xfrm_user.c2
296 files changed, 9867 insertions, 5016 deletions
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index 361116f77cb9..f75816f58107 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -106,3 +106,14 @@ config BATMAN_ADV_DEBUG
say N here. This enables compilation of support for
outputting debugging information to the kernel log. The
output is controlled via the module parameter debug.
+
+config BATMAN_ADV_TRACING
+ bool "B.A.T.M.A.N. tracing support"
+ depends on BATMAN_ADV
+ depends on EVENT_TRACING
+ help
+ This is an option for use by developers; most people should
+ say N here. Select this option to gather traces like the debug
+ messages using the generic tracing infrastructure of the kernel.
+ BATMAN_ADV_DEBUG must also be selected to get trace events for
+ batadv_dbg.
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index b97ba6fb8353..9b58160fe485 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -42,6 +42,9 @@ batman-adv-y += routing.o
batman-adv-y += send.o
batman-adv-y += soft-interface.o
batman-adv-y += sysfs.o
+batman-adv-$(CONFIG_BATMAN_ADV_TRACING) += trace.o
batman-adv-y += tp_meter.o
batman-adv-y += translation-table.o
batman-adv-y += tvlv.o
+
+CFLAGS_trace.o := -I$(src)
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 73bf6a93a3cf..d2227091029f 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -138,169 +138,6 @@ static u8 batadv_ring_buffer_avg(const u8 lq_recv[])
}
/**
- * batadv_iv_ogm_orig_free() - free the private resources allocated for this
- * orig_node
- * @orig_node: the orig_node for which the resources have to be free'd
- */
-static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node)
-{
- kfree(orig_node->bat_iv.bcast_own);
- kfree(orig_node->bat_iv.bcast_own_sum);
-}
-
-/**
- * batadv_iv_ogm_orig_add_if() - change the private structures of the orig_node
- * to include the new hard-interface
- * @orig_node: the orig_node that has to be changed
- * @max_if_num: the current amount of interfaces
- *
- * Return: 0 on success, a negative error code otherwise.
- */
-static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node,
- unsigned int max_if_num)
-{
- void *data_ptr;
- size_t old_size;
- int ret = -ENOMEM;
-
- spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
-
- old_size = (max_if_num - 1) * sizeof(unsigned long) * BATADV_NUM_WORDS;
- data_ptr = kmalloc_array(max_if_num,
- BATADV_NUM_WORDS * sizeof(unsigned long),
- GFP_ATOMIC);
- if (!data_ptr)
- goto unlock;
-
- memcpy(data_ptr, orig_node->bat_iv.bcast_own, old_size);
- kfree(orig_node->bat_iv.bcast_own);
- orig_node->bat_iv.bcast_own = data_ptr;
-
- data_ptr = kmalloc_array(max_if_num, sizeof(u8), GFP_ATOMIC);
- if (!data_ptr)
- goto unlock;
-
- memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum,
- (max_if_num - 1) * sizeof(u8));
- kfree(orig_node->bat_iv.bcast_own_sum);
- orig_node->bat_iv.bcast_own_sum = data_ptr;
-
- ret = 0;
-
-unlock:
- spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
-
- return ret;
-}
-
-/**
- * batadv_iv_ogm_drop_bcast_own_entry() - drop section of bcast_own
- * @orig_node: the orig_node that has to be changed
- * @max_if_num: the current amount of interfaces
- * @del_if_num: the index of the interface being removed
- */
-static void
-batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node,
- unsigned int max_if_num,
- unsigned int del_if_num)
-{
- size_t chunk_size;
- size_t if_offset;
- void *data_ptr;
-
- lockdep_assert_held(&orig_node->bat_iv.ogm_cnt_lock);
-
- chunk_size = sizeof(unsigned long) * BATADV_NUM_WORDS;
- data_ptr = kmalloc_array(max_if_num, chunk_size, GFP_ATOMIC);
- if (!data_ptr)
- /* use old buffer when new one could not be allocated */
- data_ptr = orig_node->bat_iv.bcast_own;
-
- /* copy first part */
- memmove(data_ptr, orig_node->bat_iv.bcast_own, del_if_num * chunk_size);
-
- /* copy second part */
- if_offset = (del_if_num + 1) * chunk_size;
- memmove((char *)data_ptr + del_if_num * chunk_size,
- (uint8_t *)orig_node->bat_iv.bcast_own + if_offset,
- (max_if_num - del_if_num) * chunk_size);
-
- /* bcast_own was shrunk down in new buffer; free old one */
- if (orig_node->bat_iv.bcast_own != data_ptr) {
- kfree(orig_node->bat_iv.bcast_own);
- orig_node->bat_iv.bcast_own = data_ptr;
- }
-}
-
-/**
- * batadv_iv_ogm_drop_bcast_own_sum_entry() - drop section of bcast_own_sum
- * @orig_node: the orig_node that has to be changed
- * @max_if_num: the current amount of interfaces
- * @del_if_num: the index of the interface being removed
- */
-static void
-batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node,
- unsigned int max_if_num,
- unsigned int del_if_num)
-{
- size_t if_offset;
- void *data_ptr;
-
- lockdep_assert_held(&orig_node->bat_iv.ogm_cnt_lock);
-
- data_ptr = kmalloc_array(max_if_num, sizeof(u8), GFP_ATOMIC);
- if (!data_ptr)
- /* use old buffer when new one could not be allocated */
- data_ptr = orig_node->bat_iv.bcast_own_sum;
-
- memmove(data_ptr, orig_node->bat_iv.bcast_own_sum,
- del_if_num * sizeof(u8));
-
- if_offset = (del_if_num + 1) * sizeof(u8);
- memmove((char *)data_ptr + del_if_num * sizeof(u8),
- orig_node->bat_iv.bcast_own_sum + if_offset,
- (max_if_num - del_if_num) * sizeof(u8));
-
- /* bcast_own_sum was shrunk down in new buffer; free old one */
- if (orig_node->bat_iv.bcast_own_sum != data_ptr) {
- kfree(orig_node->bat_iv.bcast_own_sum);
- orig_node->bat_iv.bcast_own_sum = data_ptr;
- }
-}
-
-/**
- * batadv_iv_ogm_orig_del_if() - change the private structures of the orig_node
- * to exclude the removed interface
- * @orig_node: the orig_node that has to be changed
- * @max_if_num: the current amount of interfaces
- * @del_if_num: the index of the interface being removed
- *
- * Return: 0 on success, a negative error code otherwise.
- */
-static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
- unsigned int max_if_num,
- unsigned int del_if_num)
-{
- spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
-
- if (max_if_num == 0) {
- kfree(orig_node->bat_iv.bcast_own);
- kfree(orig_node->bat_iv.bcast_own_sum);
- orig_node->bat_iv.bcast_own = NULL;
- orig_node->bat_iv.bcast_own_sum = NULL;
- } else {
- batadv_iv_ogm_drop_bcast_own_entry(orig_node, max_if_num,
- del_if_num);
- batadv_iv_ogm_drop_bcast_own_sum_entry(orig_node, max_if_num,
- del_if_num);
- }
-
- spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
-
- return 0;
-}
-
-/**
* batadv_iv_ogm_orig_get() - retrieve or create (if does not exist) an
* originator
* @bat_priv: the bat priv with all the soft interface information
@@ -315,7 +152,6 @@ batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr)
{
struct batadv_orig_node *orig_node;
int hash_added;
- size_t size;
orig_node = batadv_orig_hash_find(bat_priv, addr);
if (orig_node)
@@ -327,16 +163,6 @@ batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr)
spin_lock_init(&orig_node->bat_iv.ogm_cnt_lock);
- size = bat_priv->num_ifaces * sizeof(unsigned long) * BATADV_NUM_WORDS;
- orig_node->bat_iv.bcast_own = kzalloc(size, GFP_ATOMIC);
- if (!orig_node->bat_iv.bcast_own)
- goto free_orig_node;
-
- size = bat_priv->num_ifaces * sizeof(u8);
- orig_node->bat_iv.bcast_own_sum = kzalloc(size, GFP_ATOMIC);
- if (!orig_node->bat_iv.bcast_own_sum)
- goto free_orig_node;
-
kref_get(&orig_node->refcount);
hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig,
batadv_choose_orig, orig_node,
@@ -347,8 +173,9 @@ batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr)
return orig_node;
free_orig_node_hash:
+ /* reference for batadv_hash_add */
batadv_orig_node_put(orig_node);
-free_orig_node:
+ /* reference from batadv_orig_node_new */
batadv_orig_node_put(orig_node);
return NULL;
@@ -893,26 +720,30 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface)
struct batadv_hashtable *hash = bat_priv->orig_hash;
struct hlist_head *head;
struct batadv_orig_node *orig_node;
+ struct batadv_orig_ifinfo *orig_ifinfo;
unsigned long *word;
u32 i;
- size_t word_index;
u8 *w;
- unsigned int if_num;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
rcu_read_lock();
hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
- spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
- word_index = hard_iface->if_num * BATADV_NUM_WORDS;
- word = &orig_node->bat_iv.bcast_own[word_index];
-
- batadv_bit_get_packet(bat_priv, word, 1, 0);
- if_num = hard_iface->if_num;
- w = &orig_node->bat_iv.bcast_own_sum[if_num];
- *w = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE);
- spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+ hlist_for_each_entry_rcu(orig_ifinfo,
+ &orig_node->ifinfo_list,
+ list) {
+ if (orig_ifinfo->if_outgoing != hard_iface)
+ continue;
+
+ spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+ word = orig_ifinfo->bat_iv.bcast_own;
+ batadv_bit_get_packet(bat_priv, word, 1, 0);
+ w = &orig_ifinfo->bat_iv.bcast_own_sum;
+ *w = bitmap_weight(word,
+ BATADV_TQ_LOCAL_WINDOW_SIZE);
+ spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+ }
}
rcu_read_unlock();
}
@@ -1000,6 +831,35 @@ out:
}
/**
+ * batadv_iv_orig_ifinfo_sum() - Get bcast_own sum for originator over iterface
+ * @orig_node: originator which reproadcasted the OGMs directly
+ * @if_outgoing: interface which transmitted the original OGM and received the
+ * direct rebroadcast
+ *
+ * Return: Number of replied (rebroadcasted) OGMs which were transmitted by
+ * an originator and directly (without intermediate hop) received by a specific
+ * interface
+ */
+static u8 batadv_iv_orig_ifinfo_sum(struct batadv_orig_node *orig_node,
+ struct batadv_hard_iface *if_outgoing)
+{
+ struct batadv_orig_ifinfo *orig_ifinfo;
+ u8 sum;
+
+ orig_ifinfo = batadv_orig_ifinfo_get(orig_node, if_outgoing);
+ if (!orig_ifinfo)
+ return 0;
+
+ spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+ sum = orig_ifinfo->bat_iv.bcast_own_sum;
+ spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+
+ batadv_orig_ifinfo_put(orig_ifinfo);
+
+ return sum;
+}
+
+/**
* batadv_iv_ogm_orig_update() - use OGM to update corresponding data in an
* originator
* @bat_priv: the bat priv with all the soft interface information
@@ -1026,8 +886,6 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
struct batadv_neigh_node *neigh_node = NULL;
struct batadv_neigh_node *tmp_neigh_node = NULL;
struct batadv_neigh_node *router = NULL;
- struct batadv_orig_node *orig_node_tmp;
- unsigned int if_num;
u8 sum_orig, sum_neigh;
u8 *neigh_addr;
u8 tq_avg;
@@ -1132,18 +990,10 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
*/
if (router_ifinfo &&
neigh_ifinfo->bat_iv.tq_avg == router_ifinfo->bat_iv.tq_avg) {
- orig_node_tmp = router->orig_node;
- spin_lock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock);
- if_num = router->if_incoming->if_num;
- sum_orig = orig_node_tmp->bat_iv.bcast_own_sum[if_num];
- spin_unlock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock);
-
- orig_node_tmp = neigh_node->orig_node;
- spin_lock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock);
- if_num = neigh_node->if_incoming->if_num;
- sum_neigh = orig_node_tmp->bat_iv.bcast_own_sum[if_num];
- spin_unlock_bh(&orig_node_tmp->bat_iv.ogm_cnt_lock);
-
+ sum_orig = batadv_iv_orig_ifinfo_sum(router->orig_node,
+ router->if_incoming);
+ sum_neigh = batadv_iv_orig_ifinfo_sum(neigh_node->orig_node,
+ neigh_node->if_incoming);
if (sum_orig >= sum_neigh)
goto out;
}
@@ -1186,7 +1036,6 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
u8 total_count;
u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own;
unsigned int neigh_rq_inv_cube, neigh_rq_max_cube;
- unsigned int if_num;
unsigned int tq_asym_penalty, inv_asym_penalty;
unsigned int combined_tq;
unsigned int tq_iface_penalty;
@@ -1227,9 +1076,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
orig_node->last_seen = jiffies;
/* find packet count of corresponding one hop neighbor */
- spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
- if_num = if_incoming->if_num;
- orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num];
+ orig_eq_count = batadv_iv_orig_ifinfo_sum(orig_neigh_node, if_incoming);
neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing);
if (neigh_ifinfo) {
neigh_rq_count = neigh_ifinfo->bat_iv.real_packet_count;
@@ -1237,7 +1084,6 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
} else {
neigh_rq_count = 0;
}
- spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
/* pay attention to not get a value bigger than 100 % */
if (orig_eq_count > neigh_rq_count)
@@ -1622,6 +1468,49 @@ out:
}
/**
+ * batadv_iv_ogm_process_reply() - Check OGM for direct reply and process it
+ * @ogm_packet: rebroadcast OGM packet to process
+ * @if_incoming: the interface where this packet was received
+ * @orig_node: originator which reproadcasted the OGMs
+ * @if_incoming_seqno: OGM sequence number when rebroadcast was received
+ */
+static void batadv_iv_ogm_process_reply(struct batadv_ogm_packet *ogm_packet,
+ struct batadv_hard_iface *if_incoming,
+ struct batadv_orig_node *orig_node,
+ u32 if_incoming_seqno)
+{
+ struct batadv_orig_ifinfo *orig_ifinfo;
+ s32 bit_pos;
+ u8 *weight;
+
+ /* neighbor has to indicate direct link and it has to
+ * come via the corresponding interface
+ */
+ if (!(ogm_packet->flags & BATADV_DIRECTLINK))
+ return;
+
+ if (!batadv_compare_eth(if_incoming->net_dev->dev_addr,
+ ogm_packet->orig))
+ return;
+
+ orig_ifinfo = batadv_orig_ifinfo_get(orig_node, if_incoming);
+ if (!orig_ifinfo)
+ return;
+
+ /* save packet seqno for bidirectional check */
+ spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+ bit_pos = if_incoming_seqno - 2;
+ bit_pos -= ntohl(ogm_packet->seqno);
+ batadv_set_bit(orig_ifinfo->bat_iv.bcast_own, bit_pos);
+ weight = &orig_ifinfo->bat_iv.bcast_own_sum;
+ *weight = bitmap_weight(orig_ifinfo->bat_iv.bcast_own,
+ BATADV_TQ_LOCAL_WINDOW_SIZE);
+ spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+
+ batadv_orig_ifinfo_put(orig_ifinfo);
+}
+
+/**
* batadv_iv_ogm_process() - process an incoming batman iv OGM
* @skb: the skb containing the OGM
* @ogm_offset: offset to the OGM which should be processed (for aggregates)
@@ -1705,37 +1594,13 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
}
if (is_my_orig) {
- unsigned long *word;
- size_t offset;
- s32 bit_pos;
- unsigned int if_num;
- u8 *weight;
-
orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv,
ethhdr->h_source);
if (!orig_neigh_node)
return;
- /* neighbor has to indicate direct link and it has to
- * come via the corresponding interface
- * save packet seqno for bidirectional check
- */
- if (has_directlink_flag &&
- batadv_compare_eth(if_incoming->net_dev->dev_addr,
- ogm_packet->orig)) {
- if_num = if_incoming->if_num;
- offset = if_num * BATADV_NUM_WORDS;
-
- spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
- word = &orig_neigh_node->bat_iv.bcast_own[offset];
- bit_pos = if_incoming_seqno - 2;
- bit_pos -= ntohl(ogm_packet->seqno);
- batadv_set_bit(word, bit_pos);
- weight = &orig_neigh_node->bat_iv.bcast_own_sum[if_num];
- *weight = bitmap_weight(word,
- BATADV_TQ_LOCAL_WINDOW_SIZE);
- spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock);
- }
+ batadv_iv_ogm_process_reply(ogm_packet, if_incoming,
+ orig_neigh_node, if_incoming_seqno);
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Drop packet: originator packet from myself (via neighbor)\n");
@@ -2844,9 +2709,6 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
.print = batadv_iv_ogm_orig_print,
#endif
.dump = batadv_iv_ogm_orig_dump,
- .free = batadv_iv_ogm_orig_free,
- .add_if = batadv_iv_ogm_orig_add_if,
- .del_if = batadv_iv_ogm_orig_del_if,
},
.gw = {
.init_sel_class = batadv_iv_init_sel_class,
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 3cb82378300b..8b608a2e2653 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -47,8 +47,24 @@
static struct dentry *batadv_debugfs;
+/**
+ * batadv_debugfs_deprecated() - Log use of deprecated batadv debugfs access
+ * @file: file which was accessed
+ * @alt: explanation what can be used as alternative
+ */
+void batadv_debugfs_deprecated(struct file *file, const char *alt)
+{
+ struct dentry *dentry = file_dentry(file);
+ const char *name = dentry->d_name.name;
+
+ pr_warn_ratelimited(DEPRECATED "%s (pid %d) Use of debugfs file \"%s\".\n%s",
+ current->comm, task_pid_nr(current), name, alt);
+}
+
static int batadv_algorithms_open(struct inode *inode, struct file *file)
{
+ batadv_debugfs_deprecated(file,
+ "Use genl command BATADV_CMD_GET_ROUTING_ALGOS instead\n");
return single_open(file, batadv_algo_seq_print_text, NULL);
}
@@ -56,6 +72,8 @@ static int neighbors_open(struct inode *inode, struct file *file)
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
+ batadv_debugfs_deprecated(file,
+ "Use genl command BATADV_CMD_GET_NEIGHBORS instead\n");
return single_open(file, batadv_hardif_neigh_seq_print_text, net_dev);
}
@@ -63,6 +81,8 @@ static int batadv_originators_open(struct inode *inode, struct file *file)
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
+ batadv_debugfs_deprecated(file,
+ "Use genl command BATADV_CMD_GET_ORIGINATORS instead\n");
return single_open(file, batadv_orig_seq_print_text, net_dev);
}
@@ -79,6 +99,8 @@ static int batadv_originators_hardif_open(struct inode *inode,
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
+ batadv_debugfs_deprecated(file,
+ "Use genl command BATADV_CMD_GET_HARDIFS instead\n");
return single_open(file, batadv_orig_hardif_seq_print_text, net_dev);
}
@@ -86,6 +108,8 @@ static int batadv_gateways_open(struct inode *inode, struct file *file)
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
+ batadv_debugfs_deprecated(file,
+ "Use genl command BATADV_CMD_GET_GATEWAYS instead\n");
return single_open(file, batadv_gw_client_seq_print_text, net_dev);
}
@@ -93,6 +117,8 @@ static int batadv_transtable_global_open(struct inode *inode, struct file *file)
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
+ batadv_debugfs_deprecated(file,
+ "Use genl command BATADV_CMD_GET_TRANSTABLE_GLOBAL instead\n");
return single_open(file, batadv_tt_global_seq_print_text, net_dev);
}
@@ -101,6 +127,8 @@ static int batadv_bla_claim_table_open(struct inode *inode, struct file *file)
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
+ batadv_debugfs_deprecated(file,
+ "Use genl command BATADV_CMD_GET_BLA_CLAIM instead\n");
return single_open(file, batadv_bla_claim_table_seq_print_text,
net_dev);
}
@@ -110,6 +138,8 @@ static int batadv_bla_backbone_table_open(struct inode *inode,
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
+ batadv_debugfs_deprecated(file,
+ "Use genl command BATADV_CMD_GET_BLA_BACKBONE instead\n");
return single_open(file, batadv_bla_backbone_table_seq_print_text,
net_dev);
}
@@ -128,6 +158,8 @@ static int batadv_dat_cache_open(struct inode *inode, struct file *file)
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
+ batadv_debugfs_deprecated(file,
+ "Use genl command BATADV_CMD_GET_DAT_CACHE instead\n");
return single_open(file, batadv_dat_cache_seq_print_text, net_dev);
}
#endif
@@ -136,6 +168,8 @@ static int batadv_transtable_local_open(struct inode *inode, struct file *file)
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
+ batadv_debugfs_deprecated(file,
+ "Use genl command BATADV_CMD_GET_TRANSTABLE_LOCAL instead\n");
return single_open(file, batadv_tt_local_seq_print_text, net_dev);
}
@@ -149,6 +183,7 @@ static int batadv_nc_nodes_open(struct inode *inode, struct file *file)
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
+ batadv_debugfs_deprecated(file, "");
return single_open(file, batadv_nc_nodes_seq_print_text, net_dev);
}
#endif
@@ -165,6 +200,8 @@ static int batadv_mcast_flags_open(struct inode *inode, struct file *file)
{
struct net_device *net_dev = (struct net_device *)inode->i_private;
+ batadv_debugfs_deprecated(file,
+ "Use genl command BATADV_CMD_GET_MCAST_FLAGS instead\n");
return single_open(file, batadv_mcast_flags_seq_print_text, net_dev);
}
#endif
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 08a592ffbee5..8de018e5c577 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -21,12 +21,14 @@
#include "main.h"
+struct file;
struct net_device;
#define BATADV_DEBUGFS_SUBDIR "batman_adv"
#if IS_ENABLED(CONFIG_BATMAN_ADV_DEBUGFS)
+void batadv_debugfs_deprecated(struct file *file, const char *alt);
void batadv_debugfs_init(void);
void batadv_debugfs_destroy(void);
int batadv_debugfs_add_meshif(struct net_device *dev);
@@ -38,6 +40,10 @@ void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface);
#else
+static inline void batadv_debugfs_deprecated(struct file *file, const char *alt)
+{
+}
+
static inline void batadv_debugfs_init(void)
{
}
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 2f0d42f2f913..781c5b6e6e8e 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -763,11 +763,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
hard_iface->soft_iface = soft_iface;
bat_priv = netdev_priv(hard_iface->soft_iface);
- if (bat_priv->num_ifaces >= UINT_MAX) {
- ret = -ENOSPC;
- goto err_dev;
- }
-
ret = netdev_master_upper_dev_link(hard_iface->net_dev,
soft_iface, NULL, NULL, NULL);
if (ret)
@@ -777,16 +772,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
if (ret < 0)
goto err_upper;
- hard_iface->if_num = bat_priv->num_ifaces;
- bat_priv->num_ifaces++;
hard_iface->if_status = BATADV_IF_INACTIVE;
- ret = batadv_orig_hash_add_if(hard_iface, bat_priv->num_ifaces);
- if (ret < 0) {
- bat_priv->algo_ops->iface.disable(hard_iface);
- bat_priv->num_ifaces--;
- hard_iface->if_status = BATADV_IF_NOT_IN_USE;
- goto err_upper;
- }
kref_get(&hard_iface->refcount);
hard_iface->batman_adv_ptype.type = ethertype;
@@ -834,6 +820,33 @@ err:
}
/**
+ * batadv_hardif_cnt() - get number of interfaces enslaved to soft interface
+ * @soft_iface: soft interface to check
+ *
+ * This function is only using RCU for locking - the result can therefore be
+ * off when another functions is modifying the list at the same time. The
+ * caller can use the rtnl_lock to make sure that the count is accurate.
+ *
+ * Return: number of connected/enslaved hard interfaces
+ */
+static size_t batadv_hardif_cnt(const struct net_device *soft_iface)
+{
+ struct batadv_hard_iface *hard_iface;
+ size_t count = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+ if (hard_iface->soft_iface != soft_iface)
+ continue;
+
+ count++;
+ }
+ rcu_read_unlock();
+
+ return count;
+}
+
+/**
* batadv_hardif_disable_interface() - Remove hard interface from soft interface
* @hard_iface: hard interface to be removed
* @autodel: whether to delete soft interface when it doesn't contain any other
@@ -855,9 +868,6 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
dev_remove_pack(&hard_iface->batman_adv_ptype);
batadv_hardif_put(hard_iface);
- bat_priv->num_ifaces--;
- batadv_orig_hash_del_if(hard_iface, bat_priv->num_ifaces);
-
primary_if = batadv_primary_if_get_selected(bat_priv);
if (hard_iface == primary_if) {
struct batadv_hard_iface *new_if;
@@ -881,7 +891,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
batadv_hardif_recalc_extra_skbroom(hard_iface->soft_iface);
/* nobody uses this interface anymore */
- if (bat_priv->num_ifaces == 0) {
+ if (batadv_hardif_cnt(hard_iface->soft_iface) <= 1) {
batadv_gw_check_client_stop(bat_priv);
if (autodel == BATADV_IF_CLEANUP_AUTO)
@@ -917,7 +927,6 @@ batadv_hardif_add_interface(struct net_device *net_dev)
if (ret)
goto free_if;
- hard_iface->if_num = 0;
hard_iface->net_dev = net_dev;
hard_iface->soft_iface = NULL;
hard_iface->if_status = BATADV_IF_NOT_IN_USE;
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 55c358ad3331..d70f363c52ae 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -47,6 +47,7 @@
#include <linux/wait.h>
#include <uapi/linux/batadv_packet.h>
+#include "debugfs.h"
#include "hard-interface.h"
#include "log.h"
#include "originator.h"
@@ -74,6 +75,8 @@ static int batadv_socket_open(struct inode *inode, struct file *file)
if (!try_module_get(THIS_MODULE))
return -EBUSY;
+ batadv_debugfs_deprecated(file, "");
+
nonseekable_open(inode, file);
socket_client = kmalloc(sizeof(*socket_client), GFP_KERNEL);
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index 853773e45f79..6beb5f067810 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -40,6 +40,9 @@
#include <linux/wait.h>
#include <stdarg.h>
+#include "debugfs.h"
+#include "trace.h"
+
#define BATADV_LOG_BUFF_MASK (batadv_log_buff_len - 1)
static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN;
@@ -98,13 +101,19 @@ static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log,
*/
int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
- char tmp_log_buf[256];
va_start(args, fmt);
- vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args);
- batadv_fdebug_log(bat_priv->debug_log, "[%10u] %s",
- jiffies_to_msecs(jiffies), tmp_log_buf);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ batadv_fdebug_log(bat_priv->debug_log, "[%10u] %pV",
+ jiffies_to_msecs(jiffies), &vaf);
+
+ trace_batadv_dbg(bat_priv, &vaf);
+
va_end(args);
return 0;
@@ -115,6 +124,9 @@ static int batadv_log_open(struct inode *inode, struct file *file)
if (!try_module_get(THIS_MODULE))
return -EBUSY;
+ batadv_debugfs_deprecated(file,
+ "Use tracepoint batadv:batadv_dbg instead\n");
+
nonseekable_open(inode, file);
file->private_data = inode->i_private;
return 0;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 3ccc75ee719c..2002b70e18db 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -25,7 +25,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2018.3"
+#define BATADV_SOURCE_VERSION "2018.4"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 1d295da3e342..56a981af5c92 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -904,9 +904,6 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
batadv_frag_purge_orig(orig_node, NULL);
- if (orig_node->bat_priv->algo_ops->orig.free)
- orig_node->bat_priv->algo_ops->orig.free(orig_node);
-
kfree(orig_node->tt_buff);
kfree(orig_node);
}
@@ -1555,107 +1552,3 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb)
return ret;
}
-
-/**
- * batadv_orig_hash_add_if() - Add interface to originators in orig_hash
- * @hard_iface: hard interface to add (already slave of the soft interface)
- * @max_if_num: new number of interfaces
- *
- * Return: 0 on success or negative error number in case of failure
- */
-int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
- unsigned int max_if_num)
-{
- struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
- struct batadv_algo_ops *bao = bat_priv->algo_ops;
- struct batadv_hashtable *hash = bat_priv->orig_hash;
- struct hlist_head *head;
- struct batadv_orig_node *orig_node;
- u32 i;
- int ret;
-
- /* resize all orig nodes because orig_node->bcast_own(_sum) depend on
- * if_num
- */
- for (i = 0; i < hash->size; i++) {
- head = &hash->table[i];
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
- ret = 0;
- if (bao->orig.add_if)
- ret = bao->orig.add_if(orig_node, max_if_num);
- if (ret == -ENOMEM)
- goto err;
- }
- rcu_read_unlock();
- }
-
- return 0;
-
-err:
- rcu_read_unlock();
- return -ENOMEM;
-}
-
-/**
- * batadv_orig_hash_del_if() - Remove interface from originators in orig_hash
- * @hard_iface: hard interface to remove (still slave of the soft interface)
- * @max_if_num: new number of interfaces
- *
- * Return: 0 on success or negative error number in case of failure
- */
-int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
- unsigned int max_if_num)
-{
- struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
- struct batadv_hashtable *hash = bat_priv->orig_hash;
- struct hlist_head *head;
- struct batadv_hard_iface *hard_iface_tmp;
- struct batadv_orig_node *orig_node;
- struct batadv_algo_ops *bao = bat_priv->algo_ops;
- u32 i;
- int ret;
-
- /* resize all orig nodes because orig_node->bcast_own(_sum) depend on
- * if_num
- */
- for (i = 0; i < hash->size; i++) {
- head = &hash->table[i];
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
- ret = 0;
- if (bao->orig.del_if)
- ret = bao->orig.del_if(orig_node, max_if_num,
- hard_iface->if_num);
- if (ret == -ENOMEM)
- goto err;
- }
- rcu_read_unlock();
- }
-
- /* renumber remaining batman interfaces _inside_ of orig_hash_lock */
- rcu_read_lock();
- list_for_each_entry_rcu(hard_iface_tmp, &batadv_hardif_list, list) {
- if (hard_iface_tmp->if_status == BATADV_IF_NOT_IN_USE)
- continue;
-
- if (hard_iface == hard_iface_tmp)
- continue;
-
- if (hard_iface->soft_iface != hard_iface_tmp->soft_iface)
- continue;
-
- if (hard_iface_tmp->if_num > hard_iface->if_num)
- hard_iface_tmp->if_num--;
- }
- rcu_read_unlock();
-
- hard_iface->if_num = -1;
- return 0;
-
-err:
- rcu_read_unlock();
- return -ENOMEM;
-}
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 3b3f59b881e1..a8b4c7b667ec 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -72,10 +72,6 @@ void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo);
int batadv_orig_seq_print_text(struct seq_file *seq, void *offset);
int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb);
int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset);
-int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
- unsigned int max_if_num);
-int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
- unsigned int max_if_num);
struct batadv_orig_node_vlan *
batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
unsigned short vid);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 626ddca332db..5db5a0a4c959 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -844,7 +844,6 @@ static int batadv_softif_init_late(struct net_device *dev)
atomic_set(&bat_priv->frag_seqno, random_seqno);
bat_priv->primary_if = NULL;
- bat_priv->num_ifaces = 0;
batadv_nc_init_bat_priv(bat_priv);
@@ -1062,6 +1061,7 @@ static void batadv_softif_init_early(struct net_device *dev)
dev->needs_free_netdev = true;
dev->priv_destructor = batadv_softif_free;
dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_NETNS_LOCAL;
+ dev->features |= NETIF_F_LLTX;
dev->priv_flags |= IFF_NO_QUEUE;
/* can't call min_mtu, because the needed variables
diff --git a/net/batman-adv/trace.c b/net/batman-adv/trace.c
new file mode 100644
index 000000000000..3d57f9981f25
--- /dev/null
+++ b/net/batman-adv/trace.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors:
+ *
+ * Sven Eckelmann
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h
new file mode 100644
index 000000000000..3acda26a30ca
--- /dev/null
+++ b/net/batman-adv/trace.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors:
+ *
+ * Sven Eckelmann
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#if !defined(_NET_BATMAN_ADV_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _NET_BATMAN_ADV_TRACE_H_
+
+#include "main.h"
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM batadv
+
+/* provide dummy function when tracing is disabled */
+#if !defined(CONFIG_BATMAN_ADV_TRACING)
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(name, proto, ...) \
+ static inline void trace_ ## name(proto) {}
+
+#endif /* CONFIG_BATMAN_ADV_TRACING */
+
+#define BATADV_MAX_MSG_LEN 256
+
+TRACE_EVENT(batadv_dbg,
+
+ TP_PROTO(struct batadv_priv *bat_priv,
+ struct va_format *vaf),
+
+ TP_ARGS(bat_priv, vaf),
+
+ TP_STRUCT__entry(
+ __string(device, bat_priv->soft_iface->name)
+ __string(driver, KBUILD_MODNAME)
+ __dynamic_array(char, msg, BATADV_MAX_MSG_LEN)
+ ),
+
+ TP_fast_assign(
+ __assign_str(device, bat_priv->soft_iface->name);
+ __assign_str(driver, KBUILD_MODNAME);
+ WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
+ BATADV_MAX_MSG_LEN,
+ vaf->fmt,
+ *vaf->va) >= BATADV_MAX_MSG_LEN);
+ ),
+
+ TP_printk(
+ "%s %s %s",
+ __get_str(driver),
+ __get_str(device),
+ __get_str(msg)
+ )
+);
+
+#endif /* _NET_BATMAN_ADV_TRACE_H_ || TRACE_HEADER_MULTI_READ */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 343d304851a5..45b5592de816 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -167,9 +167,6 @@ struct batadv_hard_iface {
/** @list: list node for batadv_hardif_list */
struct list_head list;
- /** @if_num: identificator of the interface */
- unsigned int if_num;
-
/** @if_status: status of the interface for batman-adv */
char if_status;
@@ -233,6 +230,20 @@ struct batadv_hard_iface {
};
/**
+ * struct batadv_orig_ifinfo - B.A.T.M.A.N. IV private orig_ifinfo members
+ */
+struct batadv_orig_ifinfo_bat_iv {
+ /**
+ * @bcast_own: bitfield which counts the number of our OGMs this
+ * orig_node rebroadcasted "back" to us (relative to last_real_seqno)
+ */
+ DECLARE_BITMAP(bcast_own, BATADV_TQ_LOCAL_WINDOW_SIZE);
+
+ /** @bcast_own_sum: sum of bcast_own */
+ u8 bcast_own_sum;
+};
+
+/**
* struct batadv_orig_ifinfo - originator info per outgoing interface
*/
struct batadv_orig_ifinfo {
@@ -257,6 +268,9 @@ struct batadv_orig_ifinfo {
/** @batman_seqno_reset: time when the batman seqno window was reset */
unsigned long batman_seqno_reset;
+ /** @bat_iv: B.A.T.M.A.N. IV private structure */
+ struct batadv_orig_ifinfo_bat_iv bat_iv;
+
/** @refcount: number of contexts the object is used */
struct kref refcount;
@@ -339,19 +353,10 @@ struct batadv_orig_node_vlan {
*/
struct batadv_orig_bat_iv {
/**
- * @bcast_own: set of bitfields (one per hard-interface) where each one
- * counts the number of our OGMs this orig_node rebroadcasted "back" to
- * us (relative to last_real_seqno). Every bitfield is
- * BATADV_TQ_LOCAL_WINDOW_SIZE bits long.
- */
- unsigned long *bcast_own;
-
- /** @bcast_own_sum: sum of bcast_own */
- u8 *bcast_own_sum;
-
- /**
- * @ogm_cnt_lock: lock protecting bcast_own, bcast_own_sum,
- * neigh_node->bat_iv.real_bits & neigh_node->bat_iv.real_packet_count
+ * @ogm_cnt_lock: lock protecting &batadv_orig_ifinfo_bat_iv.bcast_own,
+ * &batadv_orig_ifinfo_bat_iv.bcast_own_sum,
+ * &batadv_neigh_ifinfo_bat_iv.bat_iv.real_bits and
+ * &batadv_neigh_ifinfo_bat_iv.real_packet_count
*/
spinlock_t ogm_cnt_lock;
};
@@ -1597,9 +1602,6 @@ struct batadv_priv {
/** @batman_queue_left: number of remaining OGM packet slots */
atomic_t batman_queue_left;
- /** @num_ifaces: number of interfaces assigned to this mesh interface */
- unsigned int num_ifaces;
-
/** @mesh_obj: kobject for sysfs mesh subdirectory */
struct kobject *mesh_obj;
@@ -2179,28 +2181,6 @@ struct batadv_algo_neigh_ops {
* struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific)
*/
struct batadv_algo_orig_ops {
- /**
- * @free: free the resources allocated by the routing algorithm for an
- * orig_node object (optional)
- */
- void (*free)(struct batadv_orig_node *orig_node);
-
- /**
- * @add_if: ask the routing algorithm to apply the needed changes to the
- * orig_node due to a new hard-interface being added into the mesh
- * (optional)
- */
- int (*add_if)(struct batadv_orig_node *orig_node,
- unsigned int max_if_num);
-
- /**
- * @del_if: ask the routing algorithm to apply the needed changes to the
- * orig_node due to an hard-interface being removed from the mesh
- * (optional)
- */
- int (*del_if)(struct batadv_orig_node *orig_node,
- unsigned int max_if_num, unsigned int del_if_num);
-
#ifdef CONFIG_BATMAN_ADV_DEBUGFS
/** @print: print the originator table (optional) */
void (*print)(struct batadv_priv *priv, struct seq_file *seq,
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 7b3965861013..43c284158f63 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -489,9 +489,6 @@ static int bnep_session(void *arg)
add_wait_queue(sk_sleep(sk), &wait);
while (1) {
- /* Ensure session->terminate is updated */
- smp_mb__before_atomic();
-
if (atomic_read(&s->terminate))
break;
/* RX */
@@ -512,6 +509,10 @@ static int bnep_session(void *arg)
break;
netif_wake_queue(dev);
+ /*
+ * wait_woken() performs the necessary memory barriers
+ * for us; see the header comment for this primitive.
+ */
wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
remove_wait_queue(sk_sleep(sk), &wait);
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 7f26a5a19ff6..07cfa3249f83 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -288,9 +288,6 @@ static int cmtp_session(void *arg)
add_wait_queue(sk_sleep(sk), &wait);
while (1) {
- /* Ensure session->terminate is updated */
- smp_mb__before_atomic();
-
if (atomic_read(&session->terminate))
break;
if (sk->sk_state != BT_CONNECTED)
@@ -306,6 +303,10 @@ static int cmtp_session(void *arg)
cmtp_process_transmit(session);
+ /*
+ * wait_woken() performs the necessary memory barriers
+ * for us; see the header comment for this primitive.
+ */
wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
remove_wait_queue(sk_sleep(sk), &wait);
@@ -431,9 +432,10 @@ int cmtp_del_connection(struct cmtp_conndel_req *req)
/* Stop session thread */
atomic_inc(&session->terminate);
- /* Ensure session->terminate is updated */
- smp_mb__after_atomic();
-
+ /*
+ * See the comment preceding the call to wait_woken()
+ * in cmtp_session().
+ */
wake_up_interruptible(sk_sleep(session->sock->sk));
} else
err = -ENOENT;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 74b29c7d841c..7352fe85674b 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2839,6 +2839,20 @@ struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *bdaddr_list,
return NULL;
}
+struct bdaddr_list_with_irk *hci_bdaddr_list_lookup_with_irk(
+ struct list_head *bdaddr_list, bdaddr_t *bdaddr,
+ u8 type)
+{
+ struct bdaddr_list_with_irk *b;
+
+ list_for_each_entry(b, bdaddr_list, list) {
+ if (!bacmp(&b->bdaddr, bdaddr) && b->bdaddr_type == type)
+ return b;
+ }
+
+ return NULL;
+}
+
void hci_bdaddr_list_clear(struct list_head *bdaddr_list)
{
struct bdaddr_list *b, *n;
@@ -2871,6 +2885,35 @@ int hci_bdaddr_list_add(struct list_head *list, bdaddr_t *bdaddr, u8 type)
return 0;
}
+int hci_bdaddr_list_add_with_irk(struct list_head *list, bdaddr_t *bdaddr,
+ u8 type, u8 *peer_irk, u8 *local_irk)
+{
+ struct bdaddr_list_with_irk *entry;
+
+ if (!bacmp(bdaddr, BDADDR_ANY))
+ return -EBADF;
+
+ if (hci_bdaddr_list_lookup(list, bdaddr, type))
+ return -EEXIST;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ bacpy(&entry->bdaddr, bdaddr);
+ entry->bdaddr_type = type;
+
+ if (peer_irk)
+ memcpy(entry->peer_irk, peer_irk, 16);
+
+ if (local_irk)
+ memcpy(entry->local_irk, local_irk, 16);
+
+ list_add(&entry->list, list);
+
+ return 0;
+}
+
int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type)
{
struct bdaddr_list *entry;
@@ -2890,6 +2933,26 @@ int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type)
return 0;
}
+int hci_bdaddr_list_del_with_irk(struct list_head *list, bdaddr_t *bdaddr,
+ u8 type)
+{
+ struct bdaddr_list_with_irk *entry;
+
+ if (!bacmp(bdaddr, BDADDR_ANY)) {
+ hci_bdaddr_list_clear(list);
+ return 0;
+ }
+
+ entry = hci_bdaddr_list_lookup_with_irk(list, bdaddr, type);
+ if (!entry)
+ return -ENOENT;
+
+ list_del(&entry->list);
+ kfree(entry);
+
+ return 0;
+}
+
/* This function requires the caller holds hdev->lock */
struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
bdaddr_t *addr, u8 addr_type)
@@ -3084,6 +3147,8 @@ struct hci_dev *hci_alloc_dev(void)
hdev->le_max_tx_time = 0x0148;
hdev->le_max_rx_len = 0x001b;
hdev->le_max_rx_time = 0x0148;
+ hdev->le_max_key_size = SMP_MAX_ENC_KEY_SIZE;
+ hdev->le_min_key_size = SMP_MIN_ENC_KEY_SIZE;
hdev->le_tx_def_phys = HCI_LE_SET_PHY_1M;
hdev->le_rx_def_phys = HCI_LE_SET_PHY_1M;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index f12555f23a49..f47f8fad757a 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1454,6 +1454,45 @@ static void hci_cc_le_write_def_data_len(struct hci_dev *hdev,
hdev->le_def_tx_time = le16_to_cpu(sent->tx_time);
}
+static void hci_cc_le_add_to_resolv_list(struct hci_dev *hdev,
+ struct sk_buff *skb)
+{
+ struct hci_cp_le_add_to_resolv_list *sent;
+ __u8 status = *((__u8 *) skb->data);
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+ if (status)
+ return;
+
+ sent = hci_sent_cmd_data(hdev, HCI_OP_LE_ADD_TO_RESOLV_LIST);
+ if (!sent)
+ return;
+
+ hci_bdaddr_list_add_with_irk(&hdev->le_resolv_list, &sent->bdaddr,
+ sent->bdaddr_type, sent->peer_irk,
+ sent->local_irk);
+}
+
+static void hci_cc_le_del_from_resolv_list(struct hci_dev *hdev,
+ struct sk_buff *skb)
+{
+ struct hci_cp_le_del_from_resolv_list *sent;
+ __u8 status = *((__u8 *) skb->data);
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+ if (status)
+ return;
+
+ sent = hci_sent_cmd_data(hdev, HCI_OP_LE_DEL_FROM_RESOLV_LIST);
+ if (!sent)
+ return;
+
+ hci_bdaddr_list_del_with_irk(&hdev->le_resolv_list, &sent->bdaddr,
+ sent->bdaddr_type);
+}
+
static void hci_cc_le_clear_resolv_list(struct hci_dev *hdev,
struct sk_buff *skb)
{
@@ -3279,6 +3318,14 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
hci_cc_le_write_def_data_len(hdev, skb);
break;
+ case HCI_OP_LE_ADD_TO_RESOLV_LIST:
+ hci_cc_le_add_to_resolv_list(hdev, skb);
+ break;
+
+ case HCI_OP_LE_DEL_FROM_RESOLV_LIST:
+ hci_cc_le_del_from_resolv_list(hdev, skb);
+ break;
+
case HCI_OP_LE_CLEAR_RESOLV_LIST:
hci_cc_le_clear_resolv_list(hdev, skb);
break;
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 253975cce943..3734dc1788b4 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -1074,6 +1074,10 @@ static int hidp_session_start_sync(struct hidp_session *session)
static void hidp_session_terminate(struct hidp_session *session)
{
atomic_inc(&session->terminate);
+ /*
+ * See the comment preceding the call to wait_woken()
+ * in hidp_session_run().
+ */
wake_up_interruptible(&hidp_session_wq);
}
@@ -1193,8 +1197,6 @@ static void hidp_session_run(struct hidp_session *session)
* thread is woken up by ->sk_state_changed().
*/
- /* Ensure session->terminate is updated */
- smp_mb__before_atomic();
if (atomic_read(&session->terminate))
break;
@@ -1228,14 +1230,15 @@ static void hidp_session_run(struct hidp_session *session)
hidp_process_transmit(session, &session->ctrl_transmit,
session->ctrl_sock);
+ /*
+ * wait_woken() performs the necessary memory barriers
+ * for us; see the header comment for this primitive.
+ */
wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
remove_wait_queue(&hidp_session_wq, &wait);
atomic_inc(&session->terminate);
-
- /* Ensure session->terminate is updated */
- smp_mb__after_atomic();
}
static int hidp_session_wake_function(wait_queue_entry_t *wait,
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index d17a4736e47c..514899f7f0d4 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -51,9 +51,6 @@ static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN | L2CAP_FEAT_UCD;
static LIST_HEAD(chan_list);
static DEFINE_RWLOCK(chan_list_lock);
-static u16 le_max_credits = L2CAP_LE_MAX_CREDITS;
-static u16 le_default_mps = L2CAP_LE_DEFAULT_MPS;
-
static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
u8 code, u8 ident, u16 dlen, void *data);
static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len,
@@ -519,8 +516,10 @@ static void l2cap_le_flowctl_init(struct l2cap_chan *chan)
chan->sdu_last_frag = NULL;
chan->sdu_len = 0;
chan->tx_credits = 0;
- chan->rx_credits = le_max_credits;
- chan->mps = min_t(u16, chan->imtu, le_default_mps);
+ /* Derive MPS from connection MTU to stop HCI fragmentation */
+ chan->mps = min_t(u16, chan->imtu, chan->conn->mtu - L2CAP_HDR_SIZE);
+ /* Give enough credits for a full packet */
+ chan->rx_credits = (chan->imtu / chan->mps) + 1;
skb_queue_head_init(&chan->tx_q);
}
@@ -1282,6 +1281,8 @@ static void l2cap_le_connect(struct l2cap_chan *chan)
if (test_and_set_bit(FLAG_LE_CONN_REQ_SENT, &chan->flags))
return;
+ l2cap_le_flowctl_init(chan);
+
req.psm = chan->psm;
req.scid = cpu_to_le16(chan->scid);
req.mtu = cpu_to_le16(chan->imtu);
@@ -5493,8 +5494,6 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
goto response_unlock;
}
- l2cap_le_flowctl_init(chan);
-
bacpy(&chan->src, &conn->hcon->src);
bacpy(&chan->dst, &conn->hcon->dst);
chan->src_type = bdaddr_src_type(conn->hcon);
@@ -5506,6 +5505,9 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
chan->tx_credits = __le16_to_cpu(req->credits);
__l2cap_chan_add(conn, chan);
+
+ l2cap_le_flowctl_init(chan);
+
dcid = chan->scid;
credits = chan->rx_credits;
@@ -6699,13 +6701,10 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan)
struct l2cap_le_credits pkt;
u16 return_credits;
- /* We return more credits to the sender only after the amount of
- * credits falls below half of the initial amount.
- */
- if (chan->rx_credits >= (le_max_credits + 1) / 2)
- return;
+ return_credits = ((chan->imtu / chan->mps) + 1) - chan->rx_credits;
- return_credits = le_max_credits - chan->rx_credits;
+ if (!return_credits)
+ return;
BT_DBG("chan %p returning %u credits to sender", chan, return_credits);
@@ -6719,6 +6718,21 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan)
l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CREDITS, sizeof(pkt), &pkt);
}
+static int l2cap_le_recv(struct l2cap_chan *chan, struct sk_buff *skb)
+{
+ int err;
+
+ BT_DBG("SDU reassemble complete: chan %p skb->len %u", chan, skb->len);
+
+ /* Wait recv to confirm reception before updating the credits */
+ err = chan->ops->recv(chan, skb);
+
+ /* Update credits whenever an SDU is received */
+ l2cap_chan_le_send_credits(chan);
+
+ return err;
+}
+
static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb)
{
int err;
@@ -6737,7 +6751,11 @@ static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb)
chan->rx_credits--;
BT_DBG("rx_credits %u -> %u", chan->rx_credits + 1, chan->rx_credits);
- l2cap_chan_le_send_credits(chan);
+ /* Update if remote had run out of credits, this should only happens
+ * if the remote is not using the entire MPS.
+ */
+ if (!chan->rx_credits)
+ l2cap_chan_le_send_credits(chan);
err = 0;
@@ -6763,12 +6781,22 @@ static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb)
}
if (skb->len == sdu_len)
- return chan->ops->recv(chan, skb);
+ return l2cap_le_recv(chan, skb);
chan->sdu = skb;
chan->sdu_len = sdu_len;
chan->sdu_last_frag = skb;
+ /* Detect if remote is not able to use the selected MPS */
+ if (skb->len + L2CAP_SDULEN_SIZE < chan->mps) {
+ u16 mps_len = skb->len + L2CAP_SDULEN_SIZE;
+
+ /* Adjust the number of credits */
+ BT_DBG("chan->mps %u -> %u", chan->mps, mps_len);
+ chan->mps = mps_len;
+ l2cap_chan_le_send_credits(chan);
+ }
+
return 0;
}
@@ -6785,7 +6813,7 @@ static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb)
skb = NULL;
if (chan->sdu->len == chan->sdu_len) {
- err = chan->ops->recv(chan, chan->sdu);
+ err = l2cap_le_recv(chan, chan->sdu);
if (!err) {
chan->sdu = NULL;
chan->sdu_last_frag = NULL;
@@ -7102,7 +7130,6 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
case L2CAP_MODE_BASIC:
break;
case L2CAP_MODE_LE_FLOWCTL:
- l2cap_le_flowctl_init(chan);
break;
case L2CAP_MODE_ERTM:
case L2CAP_MODE_STREAMING:
@@ -7645,11 +7672,6 @@ int __init l2cap_init(void)
l2cap_debugfs = debugfs_create_file("l2cap", 0444, bt_debugfs,
NULL, &l2cap_debugfs_fops);
- debugfs_create_u16("l2cap_le_max_credits", 0644, bt_debugfs,
- &le_max_credits);
- debugfs_create_u16("l2cap_le_default_mps", 0644, bt_debugfs,
- &le_default_mps);
-
return 0;
}
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 73f7211d0431..a1c1b7e8a45c 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -88,9 +88,6 @@ struct smp_dev {
u8 local_rand[16];
bool debug_key;
- u8 min_key_size;
- u8 max_key_size;
-
struct crypto_cipher *tfm_aes;
struct crypto_shash *tfm_cmac;
struct crypto_kpp *tfm_ecdh;
@@ -720,7 +717,7 @@ static void build_pairing_cmd(struct l2cap_conn *conn,
if (rsp == NULL) {
req->io_capability = conn->hcon->io_capability;
req->oob_flag = oob_flag;
- req->max_key_size = SMP_DEV(hdev)->max_key_size;
+ req->max_key_size = hdev->le_max_key_size;
req->init_key_dist = local_dist;
req->resp_key_dist = remote_dist;
req->auth_req = (authreq & AUTH_REQ_MASK(hdev));
@@ -731,7 +728,7 @@ static void build_pairing_cmd(struct l2cap_conn *conn,
rsp->io_capability = conn->hcon->io_capability;
rsp->oob_flag = oob_flag;
- rsp->max_key_size = SMP_DEV(hdev)->max_key_size;
+ rsp->max_key_size = hdev->le_max_key_size;
rsp->init_key_dist = req->init_key_dist & remote_dist;
rsp->resp_key_dist = req->resp_key_dist & local_dist;
rsp->auth_req = (authreq & AUTH_REQ_MASK(hdev));
@@ -745,7 +742,7 @@ static u8 check_enc_key_size(struct l2cap_conn *conn, __u8 max_key_size)
struct hci_dev *hdev = conn->hcon->hdev;
struct smp_chan *smp = chan->data;
- if (max_key_size > SMP_DEV(hdev)->max_key_size ||
+ if (max_key_size > hdev->le_max_key_size ||
max_key_size < SMP_MIN_ENC_KEY_SIZE)
return SMP_ENC_KEY_SIZE;
@@ -3264,8 +3261,6 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
smp->tfm_aes = tfm_aes;
smp->tfm_cmac = tfm_cmac;
smp->tfm_ecdh = tfm_ecdh;
- smp->min_key_size = SMP_MIN_ENC_KEY_SIZE;
- smp->max_key_size = SMP_MAX_ENC_KEY_SIZE;
create_chan:
chan = l2cap_chan_create();
@@ -3391,7 +3386,7 @@ static ssize_t le_min_key_size_read(struct file *file,
struct hci_dev *hdev = file->private_data;
char buf[4];
- snprintf(buf, sizeof(buf), "%2u\n", SMP_DEV(hdev)->min_key_size);
+ snprintf(buf, sizeof(buf), "%2u\n", hdev->le_min_key_size);
return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
}
@@ -3412,11 +3407,11 @@ static ssize_t le_min_key_size_write(struct file *file,
sscanf(buf, "%hhu", &key_size);
- if (key_size > SMP_DEV(hdev)->max_key_size ||
+ if (key_size > hdev->le_max_key_size ||
key_size < SMP_MIN_ENC_KEY_SIZE)
return -EINVAL;
- SMP_DEV(hdev)->min_key_size = key_size;
+ hdev->le_min_key_size = key_size;
return count;
}
@@ -3435,7 +3430,7 @@ static ssize_t le_max_key_size_read(struct file *file,
struct hci_dev *hdev = file->private_data;
char buf[4];
- snprintf(buf, sizeof(buf), "%2u\n", SMP_DEV(hdev)->max_key_size);
+ snprintf(buf, sizeof(buf), "%2u\n", hdev->le_max_key_size);
return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
}
@@ -3457,10 +3452,10 @@ static ssize_t le_max_key_size_write(struct file *file,
sscanf(buf, "%hhu", &key_size);
if (key_size > SMP_MAX_ENC_KEY_SIZE ||
- key_size < SMP_DEV(hdev)->min_key_size)
+ key_size < hdev->le_min_key_size)
return -EINVAL;
- SMP_DEV(hdev)->max_key_size = key_size;
+ hdev->le_max_key_size = key_size;
return count;
}
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index f4078830ea50..0c423b8cd75c 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -12,7 +12,7 @@
#include <linux/sched/signal.h>
static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
- struct bpf_cgroup_storage *storage)
+ struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
{
u32 ret;
@@ -28,13 +28,20 @@ static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
{
- struct bpf_cgroup_storage *storage = NULL;
+ struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 };
+ enum bpf_cgroup_storage_type stype;
u64 time_start, time_spent = 0;
u32 ret = 0, i;
- storage = bpf_cgroup_storage_alloc(prog);
- if (IS_ERR(storage))
- return PTR_ERR(storage);
+ for_each_cgroup_storage_type(stype) {
+ storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
+ if (IS_ERR(storage[stype])) {
+ storage[stype] = NULL;
+ for_each_cgroup_storage_type(stype)
+ bpf_cgroup_storage_free(storage[stype]);
+ return -ENOMEM;
+ }
+ }
if (!repeat)
repeat = 1;
@@ -53,7 +60,8 @@ static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
do_div(time_spent, repeat);
*time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
- bpf_cgroup_storage_free(storage);
+ for_each_cgroup_storage_type(stype)
+ bpf_cgroup_storage_free(storage[stype]);
return ret;
}
diff --git a/net/bridge/br.c b/net/bridge/br.c
index b0a0b82e2d91..e411e40333e2 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -175,6 +175,22 @@ static struct notifier_block br_switchdev_notifier = {
.notifier_call = br_switchdev_event,
};
+void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on)
+{
+ bool cur = !!br_opt_get(br, opt);
+
+ br_debug(br, "toggle option: %d state: %d -> %d\n",
+ opt, cur, on);
+
+ if (cur == on)
+ return;
+
+ if (on)
+ set_bit(opt, &br->options);
+ else
+ clear_bit(opt, &br->options);
+}
+
static void __net_exit br_net_exit(struct net *net)
{
struct net_device *dev;
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
index 2cf7716254be..6b78e6351719 100644
--- a/net/bridge/br_arp_nd_proxy.c
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -39,7 +39,7 @@ void br_recalculate_neigh_suppress_enabled(struct net_bridge *br)
}
}
- br->neigh_suppress_enabled = neigh_suppress;
+ br_opt_toggle(br, BROPT_NEIGH_SUPPRESS_ENABLED, neigh_suppress);
}
#if IS_ENABLED(CONFIG_INET)
@@ -155,7 +155,7 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
ipv4_is_multicast(tip))
return;
- if (br->neigh_suppress_enabled) {
+ if (br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) {
if (p && (p->flags & BR_NEIGH_SUPPRESS))
return;
if (ipv4_is_zeronet(sip) || sip == tip) {
@@ -175,7 +175,8 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
return;
}
- if (br->neigh_suppress_enabled && br_is_local_ip(vlandev, tip)) {
+ if (br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) &&
+ br_is_local_ip(vlandev, tip)) {
/* its our local ip, so don't proxy reply
* and don't forward to neigh suppress ports
*/
@@ -213,7 +214,8 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
/* If we have replied or as long as we know the
* mac, indicate to arp replied
*/
- if (replied || br->neigh_suppress_enabled)
+ if (replied ||
+ br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED))
BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
}
@@ -311,7 +313,7 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p,
/* Neighbor Advertisement */
memset(na, 0, sizeof(*na) + na_olen);
na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
- na->icmph.icmp6_router = 0; /* XXX: should be 1 ? */
+ na->icmph.icmp6_router = (n->flags & NTF_ROUTER) ? 1 : 0;
na->icmph.icmp6_override = 1;
na->icmph.icmp6_solicited = 1;
na->target = ns->target;
@@ -460,7 +462,8 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
* mac, indicate to NEIGH_SUPPRESS ports that we
* have replied
*/
- if (replied || br->neigh_suppress_enabled)
+ if (replied ||
+ br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED))
BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
}
neigh_release(n);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index e682a668ce57..e053a4e43758 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -67,11 +67,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
if (IS_ENABLED(CONFIG_INET) &&
(eth->h_proto == htons(ETH_P_ARP) ||
eth->h_proto == htons(ETH_P_RARP)) &&
- br->neigh_suppress_enabled) {
+ br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) {
br_do_proxy_suppress_arp(skb, br, vid, NULL);
} else if (IS_ENABLED(CONFIG_IPV6) &&
skb->protocol == htons(ETH_P_IPV6) &&
- br->neigh_suppress_enabled &&
+ br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) &&
pskb_may_pull(skb, sizeof(struct ipv6hdr) +
sizeof(struct nd_msg)) &&
ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
@@ -228,7 +228,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
dev->mtu = new_mtu;
/* this flag will be cleared if the MTU was automatically adjusted */
- br->mtu_set_by_user = true;
+ br_opt_toggle(br, BROPT_MTU_SET_BY_USER, true);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
/* remember the MTU in the rtable for PMTU */
dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 502f66349530..74331690a390 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -504,6 +504,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
fdb->added_by_user = 0;
fdb->added_by_external_learn = 0;
fdb->offloaded = 0;
+ fdb->is_sticky = 0;
fdb->updated = fdb->used = jiffies;
if (rhashtable_lookup_insert_fast(&br->fdb_hash_tbl,
&fdb->rhnode,
@@ -584,7 +585,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
unsigned long now = jiffies;
/* fastpath: update of existing entry */
- if (unlikely(source != fdb->dst)) {
+ if (unlikely(source != fdb->dst && !fdb->is_sticky)) {
fdb->dst = source;
fdb_modified = true;
/* Take over HW learned entry */
@@ -656,6 +657,8 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
ndm->ndm_flags |= NTF_OFFLOADED;
if (fdb->added_by_external_learn)
ndm->ndm_flags |= NTF_EXT_LEARNED;
+ if (fdb->is_sticky)
+ ndm->ndm_flags |= NTF_STICKY;
if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.addr))
goto nla_put_failure;
@@ -772,8 +775,10 @@ skip:
/* Update (create or replace) forwarding database entry */
static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
- const __u8 *addr, __u16 state, __u16 flags, __u16 vid)
+ const u8 *addr, u16 state, u16 flags, u16 vid,
+ u8 ndm_flags)
{
+ u8 is_sticky = !!(ndm_flags & NTF_STICKY);
struct net_bridge_fdb_entry *fdb;
bool modified = false;
@@ -789,6 +794,9 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
return -EINVAL;
}
+ if (is_sticky && (state & NUD_PERMANENT))
+ return -EINVAL;
+
fdb = br_fdb_find(br, addr, vid);
if (fdb == NULL) {
if (!(flags & NLM_F_CREATE))
@@ -832,6 +840,12 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
modified = true;
}
+
+ if (is_sticky != fdb->is_sticky) {
+ fdb->is_sticky = is_sticky;
+ modified = true;
+ }
+
fdb->added_by_user = 1;
fdb->used = jiffies;
@@ -865,7 +879,7 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
} else {
spin_lock_bh(&br->hash_lock);
err = fdb_add_entry(br, p, addr, ndm->ndm_state,
- nlh_flags, vid);
+ nlh_flags, vid, ndm->ndm_flags);
spin_unlock_bh(&br->hash_lock);
}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 0363f1bdc401..9b46d2dc4c22 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -394,8 +394,7 @@ static int find_portno(struct net_bridge *br)
struct net_bridge_port *p;
unsigned long *inuse;
- inuse = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long),
- GFP_KERNEL);
+ inuse = bitmap_zalloc(BR_MAX_PORTS, GFP_KERNEL);
if (!inuse)
return -ENOMEM;
@@ -404,7 +403,7 @@ static int find_portno(struct net_bridge *br)
set_bit(p->port_no, inuse);
}
index = find_first_zero_bit(inuse, BR_MAX_PORTS);
- kfree(inuse);
+ bitmap_free(inuse);
return (index >= BR_MAX_PORTS) ? -EXFULL : index;
}
@@ -509,14 +508,14 @@ void br_mtu_auto_adjust(struct net_bridge *br)
ASSERT_RTNL();
/* if the bridge MTU was manually configured don't mess with it */
- if (br->mtu_set_by_user)
+ if (br_opt_get(br, BROPT_MTU_SET_BY_USER))
return;
/* change to the minimum MTU and clear the flag which was set by
* the bridge ndo_change_mtu callback
*/
dev_set_mtu(br->dev, br_mtu_min(br));
- br->mtu_set_by_user = false;
+ br_opt_toggle(br, BROPT_MTU_SET_BY_USER, false);
}
static void br_set_gso_limits(struct net_bridge *br)
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 72074276c088..3ddca11f44c2 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -122,7 +122,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
br_do_proxy_suppress_arp(skb, br, vid, p);
} else if (IS_ENABLED(CONFIG_IPV6) &&
skb->protocol == htons(ETH_P_IPV6) &&
- br->neigh_suppress_enabled &&
+ br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED) &&
pskb_may_pull(skb, sizeof(struct ipv6hdr) +
sizeof(struct nd_msg)) &&
ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 6d9f48bd374a..a7ea2d431714 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -84,7 +84,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
int i, err = 0;
int idx = 0, s_idx = cb->args[1];
- if (br->multicast_disabled)
+ if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
return 0;
mdb = rcu_dereference(br->mdb);
@@ -162,6 +162,29 @@ out:
return err;
}
+static int br_mdb_valid_dump_req(const struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct br_port_msg *bpm;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*bpm))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid header for mdb dump request");
+ return -EINVAL;
+ }
+
+ bpm = nlmsg_data(nlh);
+ if (bpm->ifindex) {
+ NL_SET_ERR_MSG_MOD(extack, "Filtering by device index is not supported for mdb dump request");
+ return -EINVAL;
+ }
+ if (nlmsg_attrlen(nlh, sizeof(*bpm))) {
+ NL_SET_ERR_MSG(extack, "Invalid data after header in mdb dump request");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net_device *dev;
@@ -169,6 +192,13 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct nlmsghdr *nlh = NULL;
int idx = 0, s_idx;
+ if (cb->strict_check) {
+ int err = br_mdb_valid_dump_req(cb->nlh, cb->extack);
+
+ if (err < 0)
+ return err;
+ }
+
s_idx = cb->args[0];
rcu_read_lock();
@@ -598,7 +628,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
struct net_bridge_port *p;
int ret;
- if (!netif_running(br->dev) || br->multicast_disabled)
+ if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
return -EINVAL;
dev = __dev_get_by_index(net, entry->ifindex);
@@ -673,7 +703,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
struct br_ip ip;
int err = -EINVAL;
- if (!netif_running(br->dev) || br->multicast_disabled)
+ if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
return -EINVAL;
__mdb_entry_to_br_ip(entry, &ip);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 20ed7adcf1cc..024139b51d3a 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -158,7 +158,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
struct net_bridge_mdb_htable *mdb = rcu_dereference(br->mdb);
struct br_ip ip;
- if (br->multicast_disabled)
+ if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
return NULL;
if (BR_INPUT_SKB_CB(skb)->igmp)
@@ -411,7 +411,7 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
iph->frag_off = htons(IP_DF);
iph->ttl = 1;
iph->protocol = IPPROTO_IGMP;
- iph->saddr = br->multicast_query_use_ifaddr ?
+ iph->saddr = br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR) ?
inet_select_addr(br->dev, 0, RT_SCOPE_LINK) : 0;
iph->daddr = htonl(INADDR_ALLHOSTS_GROUP);
((u8 *)&iph[1])[0] = IPOPT_RA;
@@ -503,11 +503,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
&ip6h->saddr)) {
kfree_skb(skb);
- br->has_ipv6_addr = 0;
+ br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, false);
return NULL;
}
- br->has_ipv6_addr = 1;
+ br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true);
ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
hopopt = (u8 *)(ip6h + 1);
@@ -628,7 +628,7 @@ static struct net_bridge_mdb_entry *br_multicast_get_group(
port ? port->dev->name : br->dev->name);
err = -E2BIG;
disable:
- br->multicast_disabled = 1;
+ br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
goto err;
}
}
@@ -894,7 +894,7 @@ static void br_multicast_querier_expired(struct net_bridge *br,
struct bridge_mcast_own_query *query)
{
spin_lock(&br->multicast_lock);
- if (!netif_running(br->dev) || br->multicast_disabled)
+ if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
goto out;
br_multicast_start_querier(br, query);
@@ -965,8 +965,9 @@ static void br_multicast_send_query(struct net_bridge *br,
struct br_ip br_group;
unsigned long time;
- if (!netif_running(br->dev) || br->multicast_disabled ||
- !br->multicast_querier)
+ if (!netif_running(br->dev) ||
+ !br_opt_get(br, BROPT_MULTICAST_ENABLED) ||
+ !br_opt_get(br, BROPT_MULTICAST_QUERIER))
return;
memset(&br_group.u, 0, sizeof(br_group.u));
@@ -1036,7 +1037,7 @@ static void br_mc_disabled_update(struct net_device *dev, bool value)
.orig_dev = dev,
.id = SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED,
.flags = SWITCHDEV_F_DEFER,
- .u.mc_disabled = value,
+ .u.mc_disabled = !value,
};
switchdev_port_attr_set(dev, &attr);
@@ -1054,7 +1055,8 @@ int br_multicast_add_port(struct net_bridge_port *port)
timer_setup(&port->ip6_own_query.timer,
br_ip6_multicast_port_query_expired, 0);
#endif
- br_mc_disabled_update(port->dev, port->br->multicast_disabled);
+ br_mc_disabled_update(port->dev,
+ br_opt_get(port->br, BROPT_MULTICAST_ENABLED));
port->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats);
if (!port->mcast_stats)
@@ -1091,7 +1093,7 @@ static void __br_multicast_enable_port(struct net_bridge_port *port)
{
struct net_bridge *br = port->br;
- if (br->multicast_disabled || !netif_running(br->dev))
+ if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) || !netif_running(br->dev))
return;
br_multicast_enable(&port->ip4_own_query);
@@ -1634,7 +1636,7 @@ br_multicast_leave_group(struct net_bridge *br,
if (timer_pending(&other_query->timer))
goto out;
- if (br->multicast_querier) {
+ if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) {
__br_multicast_send_query(br, port, &mp->addr);
time = jiffies + br->multicast_last_member_count *
@@ -1746,7 +1748,7 @@ static void br_multicast_err_count(const struct net_bridge *br,
struct bridge_mcast_stats __percpu *stats;
struct bridge_mcast_stats *pstats;
- if (!br->multicast_stats_enabled)
+ if (!br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED))
return;
if (p)
@@ -1904,7 +1906,7 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
BR_INPUT_SKB_CB(skb)->igmp = 0;
BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
- if (br->multicast_disabled)
+ if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
return 0;
switch (skb->protocol) {
@@ -1956,8 +1958,6 @@ void br_multicast_init(struct net_bridge *br)
br->hash_max = 512;
br->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
- br->multicast_querier = 0;
- br->multicast_query_use_ifaddr = 0;
br->multicast_last_member_count = 2;
br->multicast_startup_query_count = 2;
@@ -1976,7 +1976,8 @@ void br_multicast_init(struct net_bridge *br)
br->ip6_other_query.delay_time = 0;
br->ip6_querier.port = NULL;
#endif
- br->has_ipv6_addr = 1;
+ br_opt_toggle(br, BROPT_MULTICAST_ENABLED, true);
+ br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true);
spin_lock_init(&br->multicast_lock);
timer_setup(&br->multicast_router_timer,
@@ -1998,7 +1999,7 @@ static void __br_multicast_open(struct net_bridge *br,
{
query->startup_sent = 0;
- if (br->multicast_disabled)
+ if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
return;
mod_timer(&query->timer, jiffies);
@@ -2173,12 +2174,12 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val)
int err = 0;
spin_lock_bh(&br->multicast_lock);
- if (br->multicast_disabled == !val)
+ if (!!br_opt_get(br, BROPT_MULTICAST_ENABLED) == !!val)
goto unlock;
- br_mc_disabled_update(br->dev, !val);
- br->multicast_disabled = !val;
- if (br->multicast_disabled)
+ br_mc_disabled_update(br->dev, val);
+ br_opt_toggle(br, BROPT_MULTICAST_ENABLED, !!val);
+ if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
goto unlock;
if (!netif_running(br->dev))
@@ -2189,7 +2190,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val)
if (mdb->old) {
err = -EEXIST;
rollback:
- br->multicast_disabled = !!val;
+ br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
goto unlock;
}
@@ -2213,7 +2214,7 @@ bool br_multicast_enabled(const struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
- return !br->multicast_disabled;
+ return !!br_opt_get(br, BROPT_MULTICAST_ENABLED);
}
EXPORT_SYMBOL_GPL(br_multicast_enabled);
@@ -2236,10 +2237,10 @@ int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
val = !!val;
spin_lock_bh(&br->multicast_lock);
- if (br->multicast_querier == val)
+ if (br_opt_get(br, BROPT_MULTICAST_QUERIER) == val)
goto unlock;
- br->multicast_querier = val;
+ br_opt_toggle(br, BROPT_MULTICAST_QUERIER, !!val);
if (!val)
goto unlock;
@@ -2560,7 +2561,7 @@ void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
struct bridge_mcast_stats __percpu *stats;
/* if multicast_disabled is true then igmp type can't be set */
- if (!type || !br->multicast_stats_enabled)
+ if (!type || !br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED))
return;
if (p)
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 37278dc280eb..b1b5e8516724 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -487,14 +487,15 @@ static unsigned int br_nf_pre_routing(void *priv,
br = p->br;
if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) {
- if (!brnf_call_ip6tables && !br->nf_call_ip6tables)
+ if (!brnf_call_ip6tables &&
+ !br_opt_get(br, BROPT_NF_CALL_IP6TABLES))
return NF_ACCEPT;
nf_bridge_pull_encap_header_rcsum(skb);
return br_nf_pre_routing_ipv6(priv, skb, state);
}
- if (!brnf_call_iptables && !br->nf_call_iptables)
+ if (!brnf_call_iptables && !br_opt_get(br, BROPT_NF_CALL_IPTABLES))
return NF_ACCEPT;
if (!IS_IP(skb) && !IS_VLAN_IP(skb) && !IS_PPPOE_IP(skb))
@@ -636,7 +637,7 @@ static unsigned int br_nf_forward_arp(void *priv,
return NF_ACCEPT;
br = p->br;
- if (!brnf_call_arptables && !br->nf_call_arptables)
+ if (!brnf_call_arptables && !br_opt_get(br, BROPT_NF_CALL_ARPTABLES))
return NF_ACCEPT;
if (!IS_ARP(skb)) {
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index ec2b58a09f76..3345f1984542 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1034,6 +1034,7 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
[IFLA_BR_MCAST_STATS_ENABLED] = { .type = NLA_U8 },
[IFLA_BR_MCAST_IGMP_VERSION] = { .type = NLA_U8 },
[IFLA_BR_MCAST_MLD_VERSION] = { .type = NLA_U8 },
+ [IFLA_BR_VLAN_STATS_PER_PORT] = { .type = NLA_U8 },
};
static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -1114,6 +1115,14 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
if (err)
return err;
}
+
+ if (data[IFLA_BR_VLAN_STATS_PER_PORT]) {
+ __u8 per_port = nla_get_u8(data[IFLA_BR_VLAN_STATS_PER_PORT]);
+
+ err = br_vlan_set_stats_per_port(br, per_port);
+ if (err)
+ return err;
+ }
#endif
if (data[IFLA_BR_GROUP_FWD_MASK]) {
@@ -1139,7 +1148,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
spin_lock_bh(&br->lock);
memcpy(br->group_addr, new_addr, sizeof(br->group_addr));
spin_unlock_bh(&br->lock);
- br->group_addr_set = true;
+ br_opt_toggle(br, BROPT_GROUP_ADDR_SET, true);
br_recalculate_fwd_mask(br);
}
@@ -1167,7 +1176,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
u8 val;
val = nla_get_u8(data[IFLA_BR_MCAST_QUERY_USE_IFADDR]);
- br->multicast_query_use_ifaddr = !!val;
+ br_opt_toggle(br, BROPT_MULTICAST_QUERY_USE_IFADDR, !!val);
}
if (data[IFLA_BR_MCAST_QUERIER]) {
@@ -1244,7 +1253,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
__u8 mcast_stats;
mcast_stats = nla_get_u8(data[IFLA_BR_MCAST_STATS_ENABLED]);
- br->multicast_stats_enabled = !!mcast_stats;
+ br_opt_toggle(br, BROPT_MULTICAST_STATS_ENABLED, !!mcast_stats);
}
if (data[IFLA_BR_MCAST_IGMP_VERSION]) {
@@ -1271,19 +1280,19 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
if (data[IFLA_BR_NF_CALL_IPTABLES]) {
u8 val = nla_get_u8(data[IFLA_BR_NF_CALL_IPTABLES]);
- br->nf_call_iptables = val ? true : false;
+ br_opt_toggle(br, BROPT_NF_CALL_IPTABLES, !!val);
}
if (data[IFLA_BR_NF_CALL_IP6TABLES]) {
u8 val = nla_get_u8(data[IFLA_BR_NF_CALL_IP6TABLES]);
- br->nf_call_ip6tables = val ? true : false;
+ br_opt_toggle(br, BROPT_NF_CALL_IP6TABLES, !!val);
}
if (data[IFLA_BR_NF_CALL_ARPTABLES]) {
u8 val = nla_get_u8(data[IFLA_BR_NF_CALL_ARPTABLES]);
- br->nf_call_arptables = val ? true : false;
+ br_opt_toggle(br, BROPT_NF_CALL_ARPTABLES, !!val);
}
#endif
@@ -1327,6 +1336,7 @@ static size_t br_get_size(const struct net_device *brdev)
nla_total_size(sizeof(__be16)) + /* IFLA_BR_VLAN_PROTOCOL */
nla_total_size(sizeof(u16)) + /* IFLA_BR_VLAN_DEFAULT_PVID */
nla_total_size(sizeof(u8)) + /* IFLA_BR_VLAN_STATS_ENABLED */
+ nla_total_size(sizeof(u8)) + /* IFLA_BR_VLAN_STATS_PER_PORT */
#endif
nla_total_size(sizeof(u16)) + /* IFLA_BR_GROUP_FWD_MASK */
nla_total_size(sizeof(struct ifla_bridge_id)) + /* IFLA_BR_ROOT_ID */
@@ -1416,17 +1426,22 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
if (nla_put_be16(skb, IFLA_BR_VLAN_PROTOCOL, br->vlan_proto) ||
nla_put_u16(skb, IFLA_BR_VLAN_DEFAULT_PVID, br->default_pvid) ||
- nla_put_u8(skb, IFLA_BR_VLAN_STATS_ENABLED, br->vlan_stats_enabled))
+ nla_put_u8(skb, IFLA_BR_VLAN_STATS_ENABLED,
+ br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) ||
+ nla_put_u8(skb, IFLA_BR_VLAN_STATS_PER_PORT,
+ br_opt_get(br, IFLA_BR_VLAN_STATS_PER_PORT)))
return -EMSGSIZE;
#endif
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
if (nla_put_u8(skb, IFLA_BR_MCAST_ROUTER, br->multicast_router) ||
- nla_put_u8(skb, IFLA_BR_MCAST_SNOOPING, !br->multicast_disabled) ||
+ nla_put_u8(skb, IFLA_BR_MCAST_SNOOPING,
+ br_opt_get(br, BROPT_MULTICAST_ENABLED)) ||
nla_put_u8(skb, IFLA_BR_MCAST_QUERY_USE_IFADDR,
- br->multicast_query_use_ifaddr) ||
- nla_put_u8(skb, IFLA_BR_MCAST_QUERIER, br->multicast_querier) ||
+ br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR)) ||
+ nla_put_u8(skb, IFLA_BR_MCAST_QUERIER,
+ br_opt_get(br, BROPT_MULTICAST_QUERIER)) ||
nla_put_u8(skb, IFLA_BR_MCAST_STATS_ENABLED,
- br->multicast_stats_enabled) ||
+ br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED)) ||
nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY,
br->hash_elasticity) ||
nla_put_u32(skb, IFLA_BR_MCAST_HASH_MAX, br->hash_max) ||
@@ -1469,11 +1484,11 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (nla_put_u8(skb, IFLA_BR_NF_CALL_IPTABLES,
- br->nf_call_iptables ? 1 : 0) ||
+ br_opt_get(br, BROPT_NF_CALL_IPTABLES) ? 1 : 0) ||
nla_put_u8(skb, IFLA_BR_NF_CALL_IP6TABLES,
- br->nf_call_ip6tables ? 1 : 0) ||
+ br_opt_get(br, BROPT_NF_CALL_IP6TABLES) ? 1 : 0) ||
nla_put_u8(skb, IFLA_BR_NF_CALL_ARPTABLES,
- br->nf_call_arptables ? 1 : 0))
+ br_opt_get(br, BROPT_NF_CALL_ARPTABLES) ? 1 : 0))
return -EMSGSIZE;
#endif
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 11ed2029985f..10ee39fdca5c 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -54,14 +54,12 @@ typedef struct bridge_id bridge_id;
typedef struct mac_addr mac_addr;
typedef __u16 port_id;
-struct bridge_id
-{
+struct bridge_id {
unsigned char prio[2];
unsigned char addr[ETH_ALEN];
};
-struct mac_addr
-{
+struct mac_addr {
unsigned char addr[ETH_ALEN];
};
@@ -181,6 +179,7 @@ struct net_bridge_fdb_entry {
struct hlist_node fdb_node;
unsigned char is_local:1,
is_static:1,
+ is_sticky:1,
added_by_user:1,
added_by_external_learn:1,
offloaded:1;
@@ -206,8 +205,7 @@ struct net_bridge_port_group {
unsigned char eth_addr[ETH_ALEN];
};
-struct net_bridge_mdb_entry
-{
+struct net_bridge_mdb_entry {
struct hlist_node hlist[2];
struct net_bridge *br;
struct net_bridge_port_group __rcu *ports;
@@ -217,8 +215,7 @@ struct net_bridge_mdb_entry
bool host_joined;
};
-struct net_bridge_mdb_htable
-{
+struct net_bridge_mdb_htable {
struct hlist_head *mhash;
struct rcu_head rcu;
struct net_bridge_mdb_htable *old;
@@ -309,16 +306,32 @@ static inline struct net_bridge_port *br_port_get_rtnl_rcu(const struct net_devi
rcu_dereference_rtnl(dev->rx_handler_data) : NULL;
}
+enum net_bridge_opts {
+ BROPT_VLAN_ENABLED,
+ BROPT_VLAN_STATS_ENABLED,
+ BROPT_NF_CALL_IPTABLES,
+ BROPT_NF_CALL_IP6TABLES,
+ BROPT_NF_CALL_ARPTABLES,
+ BROPT_GROUP_ADDR_SET,
+ BROPT_MULTICAST_ENABLED,
+ BROPT_MULTICAST_QUERIER,
+ BROPT_MULTICAST_QUERY_USE_IFADDR,
+ BROPT_MULTICAST_STATS_ENABLED,
+ BROPT_HAS_IPV6_ADDR,
+ BROPT_NEIGH_SUPPRESS_ENABLED,
+ BROPT_MTU_SET_BY_USER,
+ BROPT_VLAN_STATS_PER_PORT,
+};
+
struct net_bridge {
spinlock_t lock;
spinlock_t hash_lock;
struct list_head port_list;
struct net_device *dev;
struct pcpu_sw_netstats __percpu *stats;
+ unsigned long options;
/* These fields are accessed on each packet */
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
- u8 vlan_enabled;
- u8 vlan_stats_enabled;
__be16 vlan_proto;
u16 default_pvid;
struct net_bridge_vlan_group __rcu *vlgrp;
@@ -330,9 +343,6 @@ struct net_bridge {
struct rtable fake_rtable;
struct rt6_info fake_rt6_info;
};
- bool nf_call_iptables;
- bool nf_call_ip6tables;
- bool nf_call_arptables;
#endif
u16 group_fwd_mask;
u16 group_fwd_mask_required;
@@ -340,7 +350,6 @@ struct net_bridge {
/* STP */
bridge_id designated_root;
bridge_id bridge_id;
- u32 root_path_cost;
unsigned char topology_change;
unsigned char topology_change_detected;
u16 root_port;
@@ -352,9 +361,9 @@ struct net_bridge {
unsigned long bridge_hello_time;
unsigned long bridge_forward_delay;
unsigned long bridge_ageing_time;
+ u32 root_path_cost;
u8 group_addr[ETH_ALEN];
- bool group_addr_set;
enum {
BR_NO_STP, /* no spanning tree */
@@ -363,13 +372,6 @@ struct net_bridge {
} stp_enabled;
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
- unsigned char multicast_router;
-
- u8 multicast_disabled:1;
- u8 multicast_querier:1;
- u8 multicast_query_use_ifaddr:1;
- u8 has_ipv6_addr:1;
- u8 multicast_stats_enabled:1;
u32 hash_elasticity;
u32 hash_max;
@@ -378,7 +380,11 @@ struct net_bridge {
u32 multicast_startup_query_count;
u8 multicast_igmp_version;
-
+ u8 multicast_router;
+#if IS_ENABLED(CONFIG_IPV6)
+ u8 multicast_mld_version;
+#endif
+ spinlock_t multicast_lock;
unsigned long multicast_last_member_interval;
unsigned long multicast_membership_interval;
unsigned long multicast_querier_interval;
@@ -386,7 +392,6 @@ struct net_bridge {
unsigned long multicast_query_response_interval;
unsigned long multicast_startup_query_interval;
- spinlock_t multicast_lock;
struct net_bridge_mdb_htable __rcu *mdb;
struct hlist_head router_list;
@@ -399,7 +404,6 @@ struct net_bridge {
struct bridge_mcast_other_query ip6_other_query;
struct bridge_mcast_own_query ip6_own_query;
struct bridge_mcast_querier ip6_querier;
- u8 multicast_mld_version;
#endif /* IS_ENABLED(CONFIG_IPV6) */
#endif
@@ -413,8 +417,6 @@ struct net_bridge {
#ifdef CONFIG_NET_SWITCHDEV
int offload_fwd_mark;
#endif
- bool neigh_suppress_enabled;
- bool mtu_set_by_user;
struct hlist_head fdb_list;
};
@@ -492,6 +494,14 @@ static inline bool br_vlan_should_use(const struct net_bridge_vlan *v)
return true;
}
+static inline int br_opt_get(const struct net_bridge *br,
+ enum net_bridge_opts opt)
+{
+ return test_bit(opt, &br->options);
+}
+
+void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on);
+
/* br_device.c */
void br_dev_setup(struct net_device *dev);
void br_dev_delete(struct net_device *dev, struct list_head *list);
@@ -698,8 +708,8 @@ __br_multicast_querier_exists(struct net_bridge *br,
{
bool own_querier_enabled;
- if (br->multicast_querier) {
- if (is_ipv6 && !br->has_ipv6_addr)
+ if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) {
+ if (is_ipv6 && !br_opt_get(br, BROPT_HAS_IPV6_ADDR))
own_querier_enabled = false;
else
own_querier_enabled = true;
@@ -850,6 +860,7 @@ int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
int __br_vlan_set_proto(struct net_bridge *br, __be16 proto);
int br_vlan_set_proto(struct net_bridge *br, unsigned long val);
int br_vlan_set_stats(struct net_bridge *br, unsigned long val);
+int br_vlan_set_stats_per_port(struct net_bridge *br, unsigned long val);
int br_vlan_init(struct net_bridge *br);
int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val);
int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid);
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 0318a69888d4..60182bef6341 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -303,7 +303,7 @@ static ssize_t group_addr_store(struct device *d,
ether_addr_copy(br->group_addr, new_addr);
spin_unlock_bh(&br->lock);
- br->group_addr_set = true;
+ br_opt_toggle(br, BROPT_GROUP_ADDR_SET, true);
br_recalculate_fwd_mask(br);
netdev_state_change(br->dev);
@@ -349,7 +349,7 @@ static ssize_t multicast_snooping_show(struct device *d,
char *buf)
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%d\n", !br->multicast_disabled);
+ return sprintf(buf, "%d\n", br_opt_get(br, BROPT_MULTICAST_ENABLED));
}
static ssize_t multicast_snooping_store(struct device *d,
@@ -365,12 +365,13 @@ static ssize_t multicast_query_use_ifaddr_show(struct device *d,
char *buf)
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%d\n", br->multicast_query_use_ifaddr);
+ return sprintf(buf, "%d\n",
+ br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR));
}
static int set_query_use_ifaddr(struct net_bridge *br, unsigned long val)
{
- br->multicast_query_use_ifaddr = !!val;
+ br_opt_toggle(br, BROPT_MULTICAST_QUERY_USE_IFADDR, !!val);
return 0;
}
@@ -388,7 +389,7 @@ static ssize_t multicast_querier_show(struct device *d,
char *buf)
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%d\n", br->multicast_querier);
+ return sprintf(buf, "%d\n", br_opt_get(br, BROPT_MULTICAST_QUERIER));
}
static ssize_t multicast_querier_store(struct device *d,
@@ -636,12 +637,13 @@ static ssize_t multicast_stats_enabled_show(struct device *d,
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%u\n", br->multicast_stats_enabled);
+ return sprintf(buf, "%d\n",
+ br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED));
}
static int set_stats_enabled(struct net_bridge *br, unsigned long val)
{
- br->multicast_stats_enabled = !!val;
+ br_opt_toggle(br, BROPT_MULTICAST_STATS_ENABLED, !!val);
return 0;
}
@@ -678,12 +680,12 @@ static ssize_t nf_call_iptables_show(
struct device *d, struct device_attribute *attr, char *buf)
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%u\n", br->nf_call_iptables);
+ return sprintf(buf, "%u\n", br_opt_get(br, BROPT_NF_CALL_IPTABLES));
}
static int set_nf_call_iptables(struct net_bridge *br, unsigned long val)
{
- br->nf_call_iptables = val ? true : false;
+ br_opt_toggle(br, BROPT_NF_CALL_IPTABLES, !!val);
return 0;
}
@@ -699,12 +701,12 @@ static ssize_t nf_call_ip6tables_show(
struct device *d, struct device_attribute *attr, char *buf)
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%u\n", br->nf_call_ip6tables);
+ return sprintf(buf, "%u\n", br_opt_get(br, BROPT_NF_CALL_IP6TABLES));
}
static int set_nf_call_ip6tables(struct net_bridge *br, unsigned long val)
{
- br->nf_call_ip6tables = val ? true : false;
+ br_opt_toggle(br, BROPT_NF_CALL_IP6TABLES, !!val);
return 0;
}
@@ -720,12 +722,12 @@ static ssize_t nf_call_arptables_show(
struct device *d, struct device_attribute *attr, char *buf)
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%u\n", br->nf_call_arptables);
+ return sprintf(buf, "%u\n", br_opt_get(br, BROPT_NF_CALL_ARPTABLES));
}
static int set_nf_call_arptables(struct net_bridge *br, unsigned long val)
{
- br->nf_call_arptables = val ? true : false;
+ br_opt_toggle(br, BROPT_NF_CALL_ARPTABLES, !!val);
return 0;
}
@@ -743,7 +745,7 @@ static ssize_t vlan_filtering_show(struct device *d,
char *buf)
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%d\n", br->vlan_enabled);
+ return sprintf(buf, "%d\n", br_opt_get(br, BROPT_VLAN_ENABLED));
}
static ssize_t vlan_filtering_store(struct device *d,
@@ -791,7 +793,7 @@ static ssize_t vlan_stats_enabled_show(struct device *d,
char *buf)
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%u\n", br->vlan_stats_enabled);
+ return sprintf(buf, "%u\n", br_opt_get(br, BROPT_VLAN_STATS_ENABLED));
}
static ssize_t vlan_stats_enabled_store(struct device *d,
@@ -801,6 +803,22 @@ static ssize_t vlan_stats_enabled_store(struct device *d,
return store_bridge_parm(d, buf, len, br_vlan_set_stats);
}
static DEVICE_ATTR_RW(vlan_stats_enabled);
+
+static ssize_t vlan_stats_per_port_show(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_bridge *br = to_bridge(d);
+ return sprintf(buf, "%u\n", br_opt_get(br, BROPT_VLAN_STATS_PER_PORT));
+}
+
+static ssize_t vlan_stats_per_port_store(struct device *d,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return store_bridge_parm(d, buf, len, br_vlan_set_stats_per_port);
+}
+static DEVICE_ATTR_RW(vlan_stats_per_port);
#endif
static struct attribute *bridge_attrs[] = {
@@ -854,6 +872,7 @@ static struct attribute *bridge_attrs[] = {
&dev_attr_vlan_protocol.attr,
&dev_attr_default_pvid.attr,
&dev_attr_vlan_stats_enabled.attr,
+ &dev_attr_vlan_stats_per_port.attr,
#endif
NULL
};
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 7df269092103..9b707234e4ae 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -190,6 +190,19 @@ static void br_vlan_put_master(struct net_bridge_vlan *masterv)
}
}
+static void nbp_vlan_rcu_free(struct rcu_head *rcu)
+{
+ struct net_bridge_vlan *v;
+
+ v = container_of(rcu, struct net_bridge_vlan, rcu);
+ WARN_ON(br_vlan_is_master(v));
+ /* if we had per-port stats configured then free them here */
+ if (v->brvlan->stats != v->stats)
+ free_percpu(v->stats);
+ v->stats = NULL;
+ kfree(v);
+}
+
/* This is the shared VLAN add function which works for both ports and bridge
* devices. There are four possible calls to this function in terms of the
* vlan entry type:
@@ -245,7 +258,15 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
if (!masterv)
goto out_filt;
v->brvlan = masterv;
- v->stats = masterv->stats;
+ if (br_opt_get(br, BROPT_VLAN_STATS_PER_PORT)) {
+ v->stats = netdev_alloc_pcpu_stats(struct br_vlan_stats);
+ if (!v->stats) {
+ err = -ENOMEM;
+ goto out_filt;
+ }
+ } else {
+ v->stats = masterv->stats;
+ }
} else {
err = br_switchdev_port_vlan_add(dev, v->vid, flags);
if (err && err != -EOPNOTSUPP)
@@ -329,7 +350,7 @@ static int __vlan_del(struct net_bridge_vlan *v)
rhashtable_remove_fast(&vg->vlan_hash, &v->vnode,
br_vlan_rht_params);
__vlan_del_list(v);
- kfree_rcu(v, rcu);
+ call_rcu(&v->rcu, nbp_vlan_rcu_free);
}
br_vlan_put_master(masterv);
@@ -386,7 +407,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
return NULL;
}
}
- if (br->vlan_stats_enabled) {
+ if (br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
stats = this_cpu_ptr(v->stats);
u64_stats_update_begin(&stats->syncp);
stats->tx_bytes += skb->len;
@@ -475,14 +496,14 @@ static bool __allowed_ingress(const struct net_bridge *br,
skb->vlan_tci |= pvid;
/* if stats are disabled we can avoid the lookup */
- if (!br->vlan_stats_enabled)
+ if (!br_opt_get(br, BROPT_VLAN_STATS_ENABLED))
return true;
}
v = br_vlan_find(vg, *vid);
if (!v || !br_vlan_should_use(v))
goto drop;
- if (br->vlan_stats_enabled) {
+ if (br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
stats = this_cpu_ptr(v->stats);
u64_stats_update_begin(&stats->syncp);
stats->rx_bytes += skb->len;
@@ -504,7 +525,7 @@ bool br_allowed_ingress(const struct net_bridge *br,
/* If VLAN filtering is disabled on the bridge, all packets are
* permitted.
*/
- if (!br->vlan_enabled) {
+ if (!br_opt_get(br, BROPT_VLAN_ENABLED)) {
BR_INPUT_SKB_CB(skb)->vlan_filtered = false;
return true;
}
@@ -538,7 +559,7 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
struct net_bridge *br = p->br;
/* If filtering was disabled at input, let it pass. */
- if (!br->vlan_enabled)
+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
return true;
vg = nbp_vlan_group_rcu(p);
@@ -695,11 +716,12 @@ struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid)
/* Must be protected by RTNL. */
static void recalculate_group_addr(struct net_bridge *br)
{
- if (br->group_addr_set)
+ if (br_opt_get(br, BROPT_GROUP_ADDR_SET))
return;
spin_lock_bh(&br->lock);
- if (!br->vlan_enabled || br->vlan_proto == htons(ETH_P_8021Q)) {
+ if (!br_opt_get(br, BROPT_VLAN_ENABLED) ||
+ br->vlan_proto == htons(ETH_P_8021Q)) {
/* Bridge Group Address */
br->group_addr[5] = 0x00;
} else { /* vlan_enabled && ETH_P_8021AD */
@@ -712,7 +734,8 @@ static void recalculate_group_addr(struct net_bridge *br)
/* Must be protected by RTNL. */
void br_recalculate_fwd_mask(struct net_bridge *br)
{
- if (!br->vlan_enabled || br->vlan_proto == htons(ETH_P_8021Q))
+ if (!br_opt_get(br, BROPT_VLAN_ENABLED) ||
+ br->vlan_proto == htons(ETH_P_8021Q))
br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT;
else /* vlan_enabled && ETH_P_8021AD */
br->group_fwd_mask_required = BR_GROUPFWD_8021AD &
@@ -729,14 +752,14 @@ int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
};
int err;
- if (br->vlan_enabled == val)
+ if (br_opt_get(br, BROPT_VLAN_ENABLED) == !!val)
return 0;
err = switchdev_port_attr_set(br->dev, &attr);
if (err && err != -EOPNOTSUPP)
return err;
- br->vlan_enabled = val;
+ br_opt_toggle(br, BROPT_VLAN_ENABLED, !!val);
br_manage_promisc(br);
recalculate_group_addr(br);
br_recalculate_fwd_mask(br);
@@ -753,7 +776,7 @@ bool br_vlan_enabled(const struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
- return !!br->vlan_enabled;
+ return br_opt_get(br, BROPT_VLAN_ENABLED);
}
EXPORT_SYMBOL_GPL(br_vlan_enabled);
@@ -819,7 +842,31 @@ int br_vlan_set_stats(struct net_bridge *br, unsigned long val)
switch (val) {
case 0:
case 1:
- br->vlan_stats_enabled = val;
+ br_opt_toggle(br, BROPT_VLAN_STATS_ENABLED, !!val);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int br_vlan_set_stats_per_port(struct net_bridge *br, unsigned long val)
+{
+ struct net_bridge_port *p;
+
+ /* allow to change the option if there are no port vlans configured */
+ list_for_each_entry(p, &br->port_list, list) {
+ struct net_bridge_vlan_group *vg = nbp_vlan_group(p);
+
+ if (vg->num_vlans)
+ return -EBUSY;
+ }
+
+ switch (val) {
+ case 0:
+ case 1:
+ br_opt_toggle(br, BROPT_VLAN_STATS_PER_PORT, !!val);
break;
default:
return -EINVAL;
@@ -877,8 +924,7 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
return 0;
}
- changed = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long),
- GFP_KERNEL);
+ changed = bitmap_zalloc(BR_MAX_PORTS, GFP_KERNEL);
if (!changed)
return -ENOMEM;
@@ -925,7 +971,7 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
br->default_pvid = pvid;
out:
- kfree(changed);
+ bitmap_free(changed);
return err;
err_port:
@@ -965,7 +1011,7 @@ int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val)
goto out;
/* Only allow default pvid change when filtering is disabled */
- if (br->vlan_enabled) {
+ if (br_opt_get(br, BROPT_VLAN_ENABLED)) {
pr_info_once("Please disable vlan filtering to change default_pvid\n");
err = -EPERM;
goto out;
@@ -1019,7 +1065,7 @@ int nbp_vlan_init(struct net_bridge_port *p)
.orig_dev = p->br->dev,
.id = SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING,
.flags = SWITCHDEV_F_SKIP_EOPNOTSUPP,
- .u.vlan_filtering = p->br->vlan_enabled,
+ .u.vlan_filtering = br_opt_get(p->br, BROPT_VLAN_ENABLED),
};
struct net_bridge_vlan_group *vg;
int ret = -ENOMEM;
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index b82440e1fcb4..a931a71ef6df 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -264,9 +264,6 @@ static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
frontpkt = rearpkt;
rearpkt = NULL;
- err = -ENOMEM;
- if (frontpkt == NULL)
- goto out;
err = -EPROTO;
if (cfpkt_add_head(frontpkt, head, 6) < 0)
goto out;
diff --git a/net/core/dev.c b/net/core/dev.c
index 93243479085f..a4d39b87b4e5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1991,6 +1991,9 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
rcu_read_lock();
again:
list_for_each_entry_rcu(ptype, ptype_list, list) {
+ if (ptype->ignore_outgoing)
+ continue;
+
/* Never send packets back to the socket
* they originated from - MvS (miquels@drinkel.ow.org)
*/
@@ -3250,7 +3253,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *de
while (skb) {
struct sk_buff *next = skb->next;
- skb->next = NULL;
+ skb_mark_not_on_list(skb);
rc = xmit_one(skb, dev, txq, next != NULL);
if (unlikely(!dev_xmit_complete(rc))) {
skb->next = next;
@@ -3350,7 +3353,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d
for (; skb != NULL; skb = next) {
next = skb->next;
- skb->next = NULL;
+ skb_mark_not_on_list(skb);
/* in case skb wont be segmented, point to itself */
skb->prev = skb;
@@ -5314,8 +5317,7 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
list_for_each_entry_safe_reverse(skb, p, head, list) {
if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
return;
- list_del(&skb->list);
- skb->next = NULL;
+ skb_list_del_init(skb);
napi_gro_complete(skb);
napi->gro_hash[index].count--;
}
@@ -5500,8 +5502,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
if (pp) {
- list_del(&pp->list);
- pp->next = NULL;
+ skb_list_del_init(pp);
napi_gro_complete(pp);
napi->gro_hash[hash].count--;
}
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 6bc42933be4a..3a4b29a13d31 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1626,7 +1626,7 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
if (!ops->eswitch_mode_set)
return -EOPNOTSUPP;
mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
- err = ops->eswitch_mode_set(devlink, mode);
+ err = ops->eswitch_mode_set(devlink, mode, info->extack);
if (err)
return err;
}
@@ -1636,7 +1636,8 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
return -EOPNOTSUPP;
inline_mode = nla_get_u8(
info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]);
- err = ops->eswitch_inline_mode_set(devlink, inline_mode);
+ err = ops->eswitch_inline_mode_set(devlink, inline_mode,
+ info->extack);
if (err)
return err;
}
@@ -1645,7 +1646,8 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
if (!ops->eswitch_encap_mode_set)
return -EOPNOTSUPP;
encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]);
- err = ops->eswitch_encap_mode_set(devlink, encap_mode);
+ err = ops->eswitch_encap_mode_set(devlink, encap_mode,
+ info->extack);
if (err)
return err;
}
@@ -2675,6 +2677,21 @@ static const struct devlink_param devlink_param_generic[] = {
.name = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME,
.type = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE,
},
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI,
+ .name = DEVLINK_PARAM_GENERIC_IGNORE_ARI_NAME,
+ .type = DEVLINK_PARAM_GENERIC_IGNORE_ARI_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
+ .name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_NAME,
+ .type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
+ .name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME,
+ .type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE,
+ },
};
static int devlink_param_generic_verify(const struct devlink_param *param)
@@ -3495,7 +3512,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
start_offset = *((u64 *)&cb->args[0]);
err = nlmsg_parse(cb->nlh, GENL_HDRLEN + devlink_nl_family.hdrsize,
- attrs, DEVLINK_ATTR_MAX, ops->policy, NULL);
+ attrs, DEVLINK_ATTR_MAX, ops->policy, cb->extack);
if (err)
goto out;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 0762aaf8e964..4cc603dfc9ef 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -27,6 +27,7 @@
#include <linux/rtnetlink.h>
#include <linux/sched/signal.h>
#include <linux/net.h>
+#include <net/xdp_sock.h>
/*
* Some useful ethtool_ops methods that're device independent.
@@ -539,47 +540,17 @@ struct ethtool_link_usettings {
} link_modes;
};
-/* Internal kernel helper to query a device ethtool_link_settings.
- *
- * Backward compatibility note: for compatibility with legacy drivers
- * that implement only the ethtool_cmd API, this has to work with both
- * drivers implementing get_link_ksettings API and drivers
- * implementing get_settings API. When drivers implement get_settings
- * and report ethtool_cmd deprecated fields
- * (transceiver/maxrxpkt/maxtxpkt), these fields are silently ignored
- * because the resulting struct ethtool_link_settings does not report them.
- */
+/* Internal kernel helper to query a device ethtool_link_settings. */
int __ethtool_get_link_ksettings(struct net_device *dev,
struct ethtool_link_ksettings *link_ksettings)
{
- int err;
- struct ethtool_cmd cmd;
-
ASSERT_RTNL();
- if (dev->ethtool_ops->get_link_ksettings) {
- memset(link_ksettings, 0, sizeof(*link_ksettings));
- return dev->ethtool_ops->get_link_ksettings(dev,
- link_ksettings);
- }
-
- /* driver doesn't support %ethtool_link_ksettings API. revert to
- * legacy %ethtool_cmd API, unless it's not supported either.
- * TODO: remove when ethtool_ops::get_settings disappears internally
- */
- if (!dev->ethtool_ops->get_settings)
+ if (!dev->ethtool_ops->get_link_ksettings)
return -EOPNOTSUPP;
- memset(&cmd, 0, sizeof(cmd));
- cmd.cmd = ETHTOOL_GSET;
- err = dev->ethtool_ops->get_settings(dev, &cmd);
- if (err < 0)
- return err;
-
- /* we ignore deprecated fields transceiver/maxrxpkt/maxtxpkt
- */
- convert_legacy_settings_to_link_ksettings(link_ksettings, &cmd);
- return err;
+ memset(link_ksettings, 0, sizeof(*link_ksettings));
+ return dev->ethtool_ops->get_link_ksettings(dev, link_ksettings);
}
EXPORT_SYMBOL(__ethtool_get_link_ksettings);
@@ -635,16 +606,7 @@ store_link_ksettings_for_user(void __user *to,
return 0;
}
-/* Query device for its ethtool_link_settings.
- *
- * Backward compatibility note: this function must fail when driver
- * does not implement ethtool::get_link_ksettings, even if legacy
- * ethtool_ops::get_settings is implemented. This tells new versions
- * of ethtool that they should use the legacy API %ETHTOOL_GSET for
- * this driver, so that they can correctly access the ethtool_cmd
- * deprecated fields (transceiver/maxrxpkt/maxtxpkt), until no driver
- * implements ethtool_ops::get_settings anymore.
- */
+/* Query device for its ethtool_link_settings. */
static int ethtool_get_link_ksettings(struct net_device *dev,
void __user *useraddr)
{
@@ -652,7 +614,6 @@ static int ethtool_get_link_ksettings(struct net_device *dev,
struct ethtool_link_ksettings link_ksettings;
ASSERT_RTNL();
-
if (!dev->ethtool_ops->get_link_ksettings)
return -EOPNOTSUPP;
@@ -699,16 +660,7 @@ static int ethtool_get_link_ksettings(struct net_device *dev,
return store_link_ksettings_for_user(useraddr, &link_ksettings);
}
-/* Update device ethtool_link_settings.
- *
- * Backward compatibility note: this function must fail when driver
- * does not implement ethtool::set_link_ksettings, even if legacy
- * ethtool_ops::set_settings is implemented. This tells new versions
- * of ethtool that they should use the legacy API %ETHTOOL_SSET for
- * this driver, so that they can correctly update the ethtool_cmd
- * deprecated fields (transceiver/maxrxpkt/maxtxpkt), until no driver
- * implements ethtool_ops::get_settings anymore.
- */
+/* Update device ethtool_link_settings. */
static int ethtool_set_link_ksettings(struct net_device *dev,
void __user *useraddr)
{
@@ -746,51 +698,31 @@ static int ethtool_set_link_ksettings(struct net_device *dev,
/* Query device for its ethtool_cmd settings.
*
- * Backward compatibility note: for compatibility with legacy ethtool,
- * this has to work with both drivers implementing get_link_ksettings
- * API and drivers implementing get_settings API. When drivers
- * implement get_link_ksettings and report higher link mode bits, a
- * kernel warning is logged once (with name of 1st driver/device) to
- * recommend user to upgrade ethtool, but the command is successful
- * (only the lower link mode bits reported back to user).
+ * Backward compatibility note: for compatibility with legacy ethtool, this is
+ * now implemented via get_link_ksettings. When driver reports higher link mode
+ * bits, a kernel warning is logged once (with name of 1st driver/device) to
+ * recommend user to upgrade ethtool, but the command is successful (only the
+ * lower link mode bits reported back to user). Deprecated fields from
+ * ethtool_cmd (transceiver/maxrxpkt/maxtxpkt) are always set to zero.
*/
static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
{
+ struct ethtool_link_ksettings link_ksettings;
struct ethtool_cmd cmd;
+ int err;
ASSERT_RTNL();
+ if (!dev->ethtool_ops->get_link_ksettings)
+ return -EOPNOTSUPP;
- if (dev->ethtool_ops->get_link_ksettings) {
- /* First, use link_ksettings API if it is supported */
- int err;
- struct ethtool_link_ksettings link_ksettings;
-
- memset(&link_ksettings, 0, sizeof(link_ksettings));
- err = dev->ethtool_ops->get_link_ksettings(dev,
- &link_ksettings);
- if (err < 0)
- return err;
- convert_link_ksettings_to_legacy_settings(&cmd,
- &link_ksettings);
-
- /* send a sensible cmd tag back to user */
- cmd.cmd = ETHTOOL_GSET;
- } else {
- /* driver doesn't support %ethtool_link_ksettings
- * API. revert to legacy %ethtool_cmd API, unless it's
- * not supported either.
- */
- int err;
-
- if (!dev->ethtool_ops->get_settings)
- return -EOPNOTSUPP;
+ memset(&link_ksettings, 0, sizeof(link_ksettings));
+ err = dev->ethtool_ops->get_link_ksettings(dev, &link_ksettings);
+ if (err < 0)
+ return err;
+ convert_link_ksettings_to_legacy_settings(&cmd, &link_ksettings);
- memset(&cmd, 0, sizeof(cmd));
- cmd.cmd = ETHTOOL_GSET;
- err = dev->ethtool_ops->get_settings(dev, &cmd);
- if (err < 0)
- return err;
- }
+ /* send a sensible cmd tag back to user */
+ cmd.cmd = ETHTOOL_GSET;
if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
return -EFAULT;
@@ -800,48 +732,29 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
/* Update device link settings with given ethtool_cmd.
*
- * Backward compatibility note: for compatibility with legacy ethtool,
- * this has to work with both drivers implementing set_link_ksettings
- * API and drivers implementing set_settings API. When drivers
- * implement set_link_ksettings and user's request updates deprecated
- * ethtool_cmd fields (transceiver/maxrxpkt/maxtxpkt), a kernel
- * warning is logged once (with name of 1st driver/device) to
- * recommend user to upgrade ethtool, and the request is rejected.
+ * Backward compatibility note: for compatibility with legacy ethtool, this is
+ * now always implemented via set_link_settings. When user's request updates
+ * deprecated ethtool_cmd fields (transceiver/maxrxpkt/maxtxpkt), a kernel
+ * warning is logged once (with name of 1st driver/device) to recommend user to
+ * upgrade ethtool, and the request is rejected.
*/
static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
{
+ struct ethtool_link_ksettings link_ksettings;
struct ethtool_cmd cmd;
ASSERT_RTNL();
if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
return -EFAULT;
-
- /* first, try new %ethtool_link_ksettings API. */
- if (dev->ethtool_ops->set_link_ksettings) {
- struct ethtool_link_ksettings link_ksettings;
-
- if (!convert_legacy_settings_to_link_ksettings(&link_ksettings,
- &cmd))
- return -EINVAL;
-
- link_ksettings.base.cmd = ETHTOOL_SLINKSETTINGS;
- link_ksettings.base.link_mode_masks_nwords
- = __ETHTOOL_LINK_MODE_MASK_NU32;
- return dev->ethtool_ops->set_link_ksettings(dev,
- &link_ksettings);
- }
-
- /* legacy %ethtool_cmd API */
-
- /* TODO: return -EOPNOTSUPP when ethtool_ops::get_settings
- * disappears internally
- */
-
- if (!dev->ethtool_ops->set_settings)
+ if (!dev->ethtool_ops->set_link_ksettings)
return -EOPNOTSUPP;
- return dev->ethtool_ops->set_settings(dev, &cmd);
+ if (!convert_legacy_settings_to_link_ksettings(&link_ksettings, &cmd))
+ return -EINVAL;
+ link_ksettings.base.link_mode_masks_nwords =
+ __ETHTOOL_LINK_MODE_MASK_NU32;
+ return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
}
static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
@@ -1750,8 +1663,10 @@ static noinline_for_stack int ethtool_get_channels(struct net_device *dev,
static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
void __user *useraddr)
{
- struct ethtool_channels channels, max = { .cmd = ETHTOOL_GCHANNELS };
+ struct ethtool_channels channels, curr = { .cmd = ETHTOOL_GCHANNELS };
+ u16 from_channel, to_channel;
u32 max_rx_in_use = 0;
+ unsigned int i;
if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels)
return -EOPNOTSUPP;
@@ -1759,13 +1674,13 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
if (copy_from_user(&channels, useraddr, sizeof(channels)))
return -EFAULT;
- dev->ethtool_ops->get_channels(dev, &max);
+ dev->ethtool_ops->get_channels(dev, &curr);
/* ensure new counts are within the maximums */
- if ((channels.rx_count > max.max_rx) ||
- (channels.tx_count > max.max_tx) ||
- (channels.combined_count > max.max_combined) ||
- (channels.other_count > max.max_other))
+ if (channels.rx_count > curr.max_rx ||
+ channels.tx_count > curr.max_tx ||
+ channels.combined_count > curr.max_combined ||
+ channels.other_count > curr.max_other)
return -EINVAL;
/* ensure the new Rx count fits within the configured Rx flow
@@ -1775,6 +1690,14 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
(channels.combined_count + channels.rx_count) <= max_rx_in_use)
return -EINVAL;
+ /* Disabling channels, query zero-copy AF_XDP sockets */
+ from_channel = channels.combined_count +
+ min(channels.rx_count, channels.tx_count);
+ to_channel = curr.combined_count + max(curr.rx_count, curr.tx_count);
+ for (i = from_channel; i < to_channel; i++)
+ if (xdp_get_umem_from_qid(dev, i))
+ return -EINVAL;
+
return dev->ethtool_ops->set_channels(dev, &channels);
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 0ff3953f64aa..ffbb827723a2 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -1063,13 +1063,47 @@ skip:
return err;
}
+static int fib_valid_dumprule_req(const struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct fib_rule_hdr *frh;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
+ NL_SET_ERR_MSG(extack, "Invalid header for fib rule dump request");
+ return -EINVAL;
+ }
+
+ frh = nlmsg_data(nlh);
+ if (frh->dst_len || frh->src_len || frh->tos || frh->table ||
+ frh->res1 || frh->res2 || frh->action || frh->flags) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid values in header for fib rule dump request");
+ return -EINVAL;
+ }
+
+ if (nlmsg_attrlen(nlh, sizeof(*frh))) {
+ NL_SET_ERR_MSG(extack, "Invalid data after header in fib rule dump request");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
struct fib_rules_ops *ops;
int idx = 0, family;
- family = rtnl_msg_family(cb->nlh);
+ if (cb->strict_check) {
+ int err = fib_valid_dumprule_req(nlh, cb->extack);
+
+ if (err < 0)
+ return err;
+ }
+
+ family = rtnl_msg_family(nlh);
if (family != AF_UNSPEC) {
/* Protocol specific dump request */
ops = lookup_rules_ops(net, family);
diff --git a/net/core/filter.c b/net/core/filter.c
index 5e00f2b85a56..4bbc6567fcb8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -58,13 +58,17 @@
#include <net/busy_poll.h>
#include <net/tcp.h>
#include <net/xfrm.h>
+#include <net/udp.h>
#include <linux/bpf_trace.h>
#include <net/xdp_sock.h>
#include <linux/inetdevice.h>
+#include <net/inet_hashtables.h>
+#include <net/inet6_hashtables.h>
#include <net/ip_fib.h>
#include <net/flow.h>
#include <net/arp.h>
#include <net/ipv6.h>
+#include <net/net_namespace.h>
#include <linux/seg6_local.h>
#include <net/seg6.h>
#include <net/seg6_local.h>
@@ -3176,6 +3180,32 @@ static int __bpf_tx_xdp(struct net_device *dev,
return 0;
}
+static noinline int
+xdp_do_redirect_slow(struct net_device *dev, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog, struct bpf_redirect_info *ri)
+{
+ struct net_device *fwd;
+ u32 index = ri->ifindex;
+ int err;
+
+ fwd = dev_get_by_index_rcu(dev_net(dev), index);
+ ri->ifindex = 0;
+ if (unlikely(!fwd)) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
+ if (unlikely(err))
+ goto err;
+
+ _trace_xdp_redirect(dev, xdp_prog, index);
+ return 0;
+err:
+ _trace_xdp_redirect_err(dev, xdp_prog, index, err);
+ return err;
+}
+
static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
struct bpf_map *map,
struct xdp_buff *xdp,
@@ -3188,7 +3218,7 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
struct bpf_dtab_netdev *dst = fwd;
err = dev_map_enqueue(dst, xdp, dev_rx);
- if (err)
+ if (unlikely(err))
return err;
__dev_map_insert_ctx(map, index);
break;
@@ -3197,7 +3227,7 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
struct bpf_cpu_map_entry *rcpu = fwd;
err = cpu_map_enqueue(rcpu, xdp, dev_rx);
- if (err)
+ if (unlikely(err))
return err;
__cpu_map_insert_ctx(map, index);
break;
@@ -3238,7 +3268,7 @@ void xdp_do_flush_map(void)
}
EXPORT_SYMBOL_GPL(xdp_do_flush_map);
-static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
+static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
{
switch (map->map_type) {
case BPF_MAP_TYPE_DEVMAP:
@@ -3270,9 +3300,9 @@ void bpf_clear_redirect_map(struct bpf_map *map)
}
static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog, struct bpf_map *map)
+ struct bpf_prog *xdp_prog, struct bpf_map *map,
+ struct bpf_redirect_info *ri)
{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
u32 index = ri->ifindex;
void *fwd = NULL;
int err;
@@ -3281,11 +3311,11 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
WRITE_ONCE(ri->map, NULL);
fwd = __xdp_map_lookup_elem(map, index);
- if (!fwd) {
+ if (unlikely(!fwd)) {
err = -EINVAL;
goto err;
}
- if (ri->map_to_flush && ri->map_to_flush != map)
+ if (ri->map_to_flush && unlikely(ri->map_to_flush != map))
xdp_do_flush_map();
err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
@@ -3305,29 +3335,11 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct bpf_map *map = READ_ONCE(ri->map);
- struct net_device *fwd;
- u32 index = ri->ifindex;
- int err;
- if (map)
- return xdp_do_redirect_map(dev, xdp, xdp_prog, map);
+ if (likely(map))
+ return xdp_do_redirect_map(dev, xdp, xdp_prog, map, ri);
- fwd = dev_get_by_index_rcu(dev_net(dev), index);
- ri->ifindex = 0;
- if (unlikely(!fwd)) {
- err = -EINVAL;
- goto err;
- }
-
- err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
- if (unlikely(err))
- goto err;
-
- _trace_xdp_redirect(dev, xdp_prog, index);
- return 0;
-err:
- _trace_xdp_redirect_err(dev, xdp_prog, index, err);
- return err;
+ return xdp_do_redirect_slow(dev, xdp, xdp_prog, ri);
}
EXPORT_SYMBOL_GPL(xdp_do_redirect);
@@ -4013,6 +4025,12 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
tp->snd_ssthresh = val;
}
break;
+ case TCP_SAVE_SYN:
+ if (val < 0 || val > 1)
+ ret = -EINVAL;
+ else
+ tp->save_syn = val;
+ break;
default:
ret = -EINVAL;
}
@@ -4042,17 +4060,29 @@ BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
if (!sk_fullsock(sk))
goto err_clear;
-
#ifdef CONFIG_INET
if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
- if (optname == TCP_CONGESTION) {
- struct inet_connection_sock *icsk = inet_csk(sk);
+ struct inet_connection_sock *icsk;
+ struct tcp_sock *tp;
+
+ switch (optname) {
+ case TCP_CONGESTION:
+ icsk = inet_csk(sk);
if (!icsk->icsk_ca_ops || optlen <= 1)
goto err_clear;
strncpy(optval, icsk->icsk_ca_ops->name, optlen);
optval[optlen - 1] = 0;
- } else {
+ break;
+ case TCP_SAVED_SYN:
+ tp = tcp_sk(sk);
+
+ if (optlen <= 0 || !tp->saved_syn ||
+ optlen > tp->saved_syn[0])
+ goto err_clear;
+ memcpy(optval, tp->saved_syn + 1, optlen);
+ break;
+ default:
goto err_clear;
}
} else if (level == SOL_IP) {
@@ -4787,6 +4817,143 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
};
#endif /* CONFIG_IPV6_SEG6_BPF */
+#ifdef CONFIG_INET
+static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
+ struct sk_buff *skb, u8 family, u8 proto)
+{
+ int dif = skb->dev->ifindex;
+ bool refcounted = false;
+ struct sock *sk = NULL;
+
+ if (family == AF_INET) {
+ __be32 src4 = tuple->ipv4.saddr;
+ __be32 dst4 = tuple->ipv4.daddr;
+ int sdif = inet_sdif(skb);
+
+ if (proto == IPPROTO_TCP)
+ sk = __inet_lookup(net, &tcp_hashinfo, skb, 0,
+ src4, tuple->ipv4.sport,
+ dst4, tuple->ipv4.dport,
+ dif, sdif, &refcounted);
+ else
+ sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
+ dst4, tuple->ipv4.dport,
+ dif, sdif, &udp_table, skb);
+#if IS_REACHABLE(CONFIG_IPV6)
+ } else {
+ struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
+ struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
+ int sdif = inet6_sdif(skb);
+
+ if (proto == IPPROTO_TCP)
+ sk = __inet6_lookup(net, &tcp_hashinfo, skb, 0,
+ src6, tuple->ipv6.sport,
+ dst6, tuple->ipv6.dport,
+ dif, sdif, &refcounted);
+ else
+ sk = __udp6_lib_lookup(net, src6, tuple->ipv6.sport,
+ dst6, tuple->ipv6.dport,
+ dif, sdif, &udp_table, skb);
+#endif
+ }
+
+ if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) {
+ WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
+ sk = NULL;
+ }
+ return sk;
+}
+
+/* bpf_sk_lookup performs the core lookup for different types of sockets,
+ * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
+ * Returns the socket as an 'unsigned long' to simplify the casting in the
+ * callers to satisfy BPF_CALL declarations.
+ */
+static unsigned long
+bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+ u8 proto, u64 netns_id, u64 flags)
+{
+ struct net *caller_net;
+ struct sock *sk = NULL;
+ u8 family = AF_UNSPEC;
+ struct net *net;
+
+ family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6;
+ if (unlikely(family == AF_UNSPEC || netns_id > U32_MAX || flags))
+ goto out;
+
+ if (skb->dev)
+ caller_net = dev_net(skb->dev);
+ else
+ caller_net = sock_net(skb->sk);
+ if (netns_id) {
+ net = get_net_ns_by_id(caller_net, netns_id);
+ if (unlikely(!net))
+ goto out;
+ sk = sk_lookup(net, tuple, skb, family, proto);
+ put_net(net);
+ } else {
+ net = caller_net;
+ sk = sk_lookup(net, tuple, skb, family, proto);
+ }
+
+ if (sk)
+ sk = sk_to_full_sk(sk);
+out:
+ return (unsigned long) sk;
+}
+
+BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
+ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+ return bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
+ .func = bpf_sk_lookup_tcp,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb,
+ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+ return bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
+ .func = bpf_sk_lookup_udp,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+BPF_CALL_1(bpf_sk_release, struct sock *, sk)
+{
+ if (!sock_flag(sk, SOCK_RCU_FREE))
+ sock_gen_put(sk);
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_sk_release_proto = {
+ .func = bpf_sk_release,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_SOCKET,
+};
+#endif /* CONFIG_INET */
+
bool bpf_helper_changes_pkt_data(void *func)
{
if (func == bpf_skb_vlan_push ||
@@ -4993,6 +5160,14 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_ancestor_cgroup_id:
return &bpf_skb_ancestor_cgroup_id_proto;
#endif
+#ifdef CONFIG_INET
+ case BPF_FUNC_sk_lookup_tcp:
+ return &bpf_sk_lookup_tcp_proto;
+ case BPF_FUNC_sk_lookup_udp:
+ return &bpf_sk_lookup_udp_proto;
+ case BPF_FUNC_sk_release:
+ return &bpf_sk_release_proto;
+#endif
default:
return bpf_base_func_proto(func_id);
}
@@ -5093,6 +5268,25 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_redirect_hash_proto;
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
+#ifdef CONFIG_INET
+ case BPF_FUNC_sk_lookup_tcp:
+ return &bpf_sk_lookup_tcp_proto;
+ case BPF_FUNC_sk_lookup_udp:
+ return &bpf_sk_lookup_udp_proto;
+ case BPF_FUNC_sk_release:
+ return &bpf_sk_release_proto;
+#endif
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
+static const struct bpf_func_proto *
+flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_skb_load_bytes:
+ return &bpf_skb_load_bytes_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -5216,6 +5410,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
if (size != size_default)
return false;
break;
+ case bpf_ctx_range(struct __sk_buff, flow_keys):
+ if (size != sizeof(struct bpf_flow_keys *))
+ return false;
+ break;
default:
/* Only narrow read access allowed for now. */
if (type == BPF_WRITE) {
@@ -5241,6 +5439,7 @@ static bool sk_filter_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end):
+ case bpf_ctx_range(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
}
@@ -5266,6 +5465,7 @@ static bool lwt_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, data_meta):
+ case bpf_ctx_range(struct __sk_buff, flow_keys):
return false;
}
@@ -5351,23 +5551,29 @@ static bool __sock_filter_check_size(int off, int size,
return size == size_default;
}
-static bool sock_filter_is_valid_access(int off, int size,
- enum bpf_access_type type,
- const struct bpf_prog *prog,
- struct bpf_insn_access_aux *info)
+bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
{
if (off < 0 || off >= sizeof(struct bpf_sock))
return false;
if (off % size != 0)
return false;
- if (!__sock_filter_check_attach_type(off, type,
- prog->expected_attach_type))
- return false;
if (!__sock_filter_check_size(off, size, info))
return false;
return true;
}
+static bool sock_filter_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (!bpf_sock_is_valid_access(off, size, type, info))
+ return false;
+ return __sock_filter_check_attach_type(off, type,
+ prog->expected_attach_type);
+}
+
static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
const struct bpf_prog *prog, int drop_verdict)
{
@@ -5476,6 +5682,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
+ case bpf_ctx_range(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
}
@@ -5677,6 +5884,7 @@ static bool sk_skb_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
+ case bpf_ctx_range(struct __sk_buff, flow_keys):
return false;
}
@@ -5736,6 +5944,39 @@ static bool sk_msg_is_valid_access(int off, int size,
return true;
}
+static bool flow_dissector_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (type == BPF_WRITE) {
+ switch (off) {
+ case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+ break;
+ default:
+ return false;
+ }
+ }
+
+ switch (off) {
+ case bpf_ctx_range(struct __sk_buff, data):
+ info->reg_type = PTR_TO_PACKET;
+ break;
+ case bpf_ctx_range(struct __sk_buff, data_end):
+ info->reg_type = PTR_TO_PACKET_END;
+ break;
+ case bpf_ctx_range(struct __sk_buff, flow_keys):
+ info->reg_type = PTR_TO_FLOW_KEYS;
+ break;
+ case bpf_ctx_range(struct __sk_buff, tc_classid):
+ case bpf_ctx_range(struct __sk_buff, data_meta):
+ case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+ return false;
+ }
+
+ return bpf_skb_is_valid_access(off, size, type, prog, info);
+}
+
static u32 bpf_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
@@ -6030,15 +6271,24 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
bpf_target_off(struct sock_common,
skc_num, 2, target_size));
break;
+
+ case offsetof(struct __sk_buff, flow_keys):
+ off = si->off;
+ off -= offsetof(struct __sk_buff, flow_keys);
+ off += offsetof(struct sk_buff, cb);
+ off += offsetof(struct qdisc_skb_cb, flow_keys);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
+ si->src_reg, off);
+ break;
}
return insn - insn_buf;
}
-static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
- const struct bpf_insn *si,
- struct bpf_insn *insn_buf,
- struct bpf_prog *prog, u32 *target_size)
+u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
int off;
@@ -6950,7 +7200,7 @@ const struct bpf_prog_ops lwt_seg6local_prog_ops = {
const struct bpf_verifier_ops cg_sock_verifier_ops = {
.get_func_proto = sock_filter_func_proto,
.is_valid_access = sock_filter_is_valid_access,
- .convert_ctx_access = sock_filter_convert_ctx_access,
+ .convert_ctx_access = bpf_sock_convert_ctx_access,
};
const struct bpf_prog_ops cg_sock_prog_ops = {
@@ -6993,6 +7243,15 @@ const struct bpf_verifier_ops sk_msg_verifier_ops = {
const struct bpf_prog_ops sk_msg_prog_ops = {
};
+const struct bpf_verifier_ops flow_dissector_verifier_ops = {
+ .get_func_proto = flow_dissector_func_proto,
+ .is_valid_access = flow_dissector_is_valid_access,
+ .convert_ctx_access = bpf_convert_ctx_access,
+};
+
+const struct bpf_prog_ops flow_dissector_prog_ops = {
+};
+
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index ce9eeeb7c024..676f3ad629f9 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -25,6 +25,9 @@
#include <net/flow_dissector.h>
#include <scsi/fc/fc_fcoe.h>
#include <uapi/linux/batadv_packet.h>
+#include <linux/bpf.h>
+
+static DEFINE_MUTEX(flow_dissector_mutex);
static void dissector_set_key(struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id)
@@ -62,6 +65,44 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
}
EXPORT_SYMBOL(skb_flow_dissector_init);
+int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
+ struct bpf_prog *prog)
+{
+ struct bpf_prog *attached;
+ struct net *net;
+
+ net = current->nsproxy->net_ns;
+ mutex_lock(&flow_dissector_mutex);
+ attached = rcu_dereference_protected(net->flow_dissector_prog,
+ lockdep_is_held(&flow_dissector_mutex));
+ if (attached) {
+ /* Only one BPF program can be attached at a time */
+ mutex_unlock(&flow_dissector_mutex);
+ return -EEXIST;
+ }
+ rcu_assign_pointer(net->flow_dissector_prog, prog);
+ mutex_unlock(&flow_dissector_mutex);
+ return 0;
+}
+
+int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
+{
+ struct bpf_prog *attached;
+ struct net *net;
+
+ net = current->nsproxy->net_ns;
+ mutex_lock(&flow_dissector_mutex);
+ attached = rcu_dereference_protected(net->flow_dissector_prog,
+ lockdep_is_held(&flow_dissector_mutex));
+ if (!attached) {
+ mutex_unlock(&flow_dissector_mutex);
+ return -ENOENT;
+ }
+ bpf_prog_put(attached);
+ RCU_INIT_POINTER(net->flow_dissector_prog, NULL);
+ mutex_unlock(&flow_dissector_mutex);
+ return 0;
+}
/**
* skb_flow_get_be16 - extract be16 entity
* @skb: sk_buff to extract from
@@ -382,8 +423,8 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
offset += sizeof(struct gre_base_hdr);
if (hdr->flags & GRE_CSUM)
- offset += sizeof(((struct gre_full_hdr *) 0)->csum) +
- sizeof(((struct gre_full_hdr *) 0)->reserved1);
+ offset += FIELD_SIZEOF(struct gre_full_hdr, csum) +
+ FIELD_SIZEOF(struct gre_full_hdr, reserved1);
if (hdr->flags & GRE_KEY) {
const __be32 *keyid;
@@ -405,11 +446,11 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
else
key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
}
- offset += sizeof(((struct gre_full_hdr *) 0)->key);
+ offset += FIELD_SIZEOF(struct gre_full_hdr, key);
}
if (hdr->flags & GRE_SEQ)
- offset += sizeof(((struct pptp_gre_header *) 0)->seq);
+ offset += FIELD_SIZEOF(struct pptp_gre_header, seq);
if (gre_ver == 0) {
if (*p_proto == htons(ETH_P_TEB)) {
@@ -436,7 +477,7 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
u8 *ppp_hdr;
if (hdr->flags & GRE_ACK)
- offset += sizeof(((struct pptp_gre_header *) 0)->ack);
+ offset += FIELD_SIZEOF(struct pptp_gre_header, ack);
ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset,
sizeof(_ppp_hdr),
@@ -588,6 +629,60 @@ static bool skb_flow_dissect_allowed(int *num_hdrs)
return (*num_hdrs <= MAX_FLOW_DISSECT_HDRS);
}
+static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
+ struct flow_dissector *flow_dissector,
+ void *target_container)
+{
+ struct flow_dissector_key_control *key_control;
+ struct flow_dissector_key_basic *key_basic;
+ struct flow_dissector_key_addrs *key_addrs;
+ struct flow_dissector_key_ports *key_ports;
+
+ key_control = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_CONTROL,
+ target_container);
+ key_control->thoff = flow_keys->thoff;
+ if (flow_keys->is_frag)
+ key_control->flags |= FLOW_DIS_IS_FRAGMENT;
+ if (flow_keys->is_first_frag)
+ key_control->flags |= FLOW_DIS_FIRST_FRAG;
+ if (flow_keys->is_encap)
+ key_control->flags |= FLOW_DIS_ENCAPSULATION;
+
+ key_basic = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ target_container);
+ key_basic->n_proto = flow_keys->n_proto;
+ key_basic->ip_proto = flow_keys->ip_proto;
+
+ if (flow_keys->addr_proto == ETH_P_IP &&
+ dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+ key_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ target_container);
+ key_addrs->v4addrs.src = flow_keys->ipv4_src;
+ key_addrs->v4addrs.dst = flow_keys->ipv4_dst;
+ key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ } else if (flow_keys->addr_proto == ETH_P_IPV6 &&
+ dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+ key_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ target_container);
+ memcpy(&key_addrs->v6addrs, &flow_keys->ipv6_src,
+ sizeof(key_addrs->v6addrs));
+ key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ }
+
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+ key_ports = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ target_container);
+ key_ports->src = flow_keys->sport;
+ key_ports->dst = flow_keys->dport;
+ }
+}
+
/**
* __skb_flow_dissect - extract the flow_keys struct and return it
* @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
@@ -619,6 +714,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_vlan *key_vlan;
enum flow_dissect_ret fdret;
enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
+ struct bpf_prog *attached = NULL;
int num_hdrs = 0;
u8 ip_proto = 0;
bool ret;
@@ -658,6 +754,50 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
FLOW_DISSECTOR_KEY_BASIC,
target_container);
+ rcu_read_lock();
+ if (skb) {
+ if (skb->dev)
+ attached = rcu_dereference(dev_net(skb->dev)->flow_dissector_prog);
+ else if (skb->sk)
+ attached = rcu_dereference(sock_net(skb->sk)->flow_dissector_prog);
+ else
+ WARN_ON_ONCE(1);
+ }
+ if (attached) {
+ /* Note that even though the const qualifier is discarded
+ * throughout the execution of the BPF program, all changes(the
+ * control block) are reverted after the BPF program returns.
+ * Therefore, __skb_flow_dissect does not alter the skb.
+ */
+ struct bpf_flow_keys flow_keys = {};
+ struct bpf_skb_data_end cb_saved;
+ struct bpf_skb_data_end *cb;
+ u32 result;
+
+ cb = (struct bpf_skb_data_end *)skb->cb;
+
+ /* Save Control Block */
+ memcpy(&cb_saved, cb, sizeof(cb_saved));
+ memset(cb, 0, sizeof(cb_saved));
+
+ /* Pass parameters to the BPF program */
+ cb->qdisc_cb.flow_keys = &flow_keys;
+ flow_keys.nhoff = nhoff;
+
+ bpf_compute_data_pointers((struct sk_buff *)skb);
+ result = BPF_PROG_RUN(attached, skb);
+
+ /* Restore state */
+ memcpy(cb, &cb_saved, sizeof(cb_saved));
+
+ __skb_flow_bpf_to_target(&flow_keys, flow_dissector,
+ target_container);
+ key_control->thoff = min_t(u16, key_control->thoff, skb->len);
+ rcu_read_unlock();
+ return result == BPF_OK;
+ }
+ rcu_read_unlock();
+
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
struct ethhdr *eth = eth_hdr(skb);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 188d693cb251..9bf1b9ad1780 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -162,6 +162,34 @@ __gnet_stats_copy_basic(const seqcount_t *running,
}
EXPORT_SYMBOL(__gnet_stats_copy_basic);
+static int
+___gnet_stats_copy_basic(const seqcount_t *running,
+ struct gnet_dump *d,
+ struct gnet_stats_basic_cpu __percpu *cpu,
+ struct gnet_stats_basic_packed *b,
+ int type)
+{
+ struct gnet_stats_basic_packed bstats = {0};
+
+ __gnet_stats_copy_basic(running, &bstats, cpu, b);
+
+ if (d->compat_tc_stats && type == TCA_STATS_BASIC) {
+ d->tc_stats.bytes = bstats.bytes;
+ d->tc_stats.packets = bstats.packets;
+ }
+
+ if (d->tail) {
+ struct gnet_stats_basic sb;
+
+ memset(&sb, 0, sizeof(sb));
+ sb.bytes = bstats.bytes;
+ sb.packets = bstats.packets;
+ return gnet_stats_copy(d, type, &sb, sizeof(sb),
+ TCA_STATS_PAD);
+ }
+ return 0;
+}
+
/**
* gnet_stats_copy_basic - copy basic statistics into statistic TLV
* @running: seqcount_t pointer
@@ -181,29 +209,36 @@ gnet_stats_copy_basic(const seqcount_t *running,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b)
{
- struct gnet_stats_basic_packed bstats = {0};
-
- __gnet_stats_copy_basic(running, &bstats, cpu, b);
-
- if (d->compat_tc_stats) {
- d->tc_stats.bytes = bstats.bytes;
- d->tc_stats.packets = bstats.packets;
- }
-
- if (d->tail) {
- struct gnet_stats_basic sb;
-
- memset(&sb, 0, sizeof(sb));
- sb.bytes = bstats.bytes;
- sb.packets = bstats.packets;
- return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb),
- TCA_STATS_PAD);
- }
- return 0;
+ return ___gnet_stats_copy_basic(running, d, cpu, b,
+ TCA_STATS_BASIC);
}
EXPORT_SYMBOL(gnet_stats_copy_basic);
/**
+ * gnet_stats_copy_basic_hw - copy basic hw statistics into statistic TLV
+ * @running: seqcount_t pointer
+ * @d: dumping handle
+ * @cpu: copy statistic per cpu
+ * @b: basic statistics
+ *
+ * Appends the basic statistics to the top level TLV created by
+ * gnet_stats_start_copy().
+ *
+ * Returns 0 on success or -1 with the statistic lock released
+ * if the room in the socket buffer was not sufficient.
+ */
+int
+gnet_stats_copy_basic_hw(const seqcount_t *running,
+ struct gnet_dump *d,
+ struct gnet_stats_basic_cpu __percpu *cpu,
+ struct gnet_stats_basic_packed *b)
+{
+ return ___gnet_stats_copy_basic(running, d, cpu, b,
+ TCA_STATS_BASIC_HW);
+}
+EXPORT_SYMBOL(gnet_stats_copy_basic_hw);
+
+/**
* gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV
* @d: dumping handle
* @rate_est: rate estimator
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index e38e641e98d5..7f51efb2b3ab 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -155,7 +155,7 @@ static void linkwatch_do_dev(struct net_device *dev)
clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
rfc2863_policy(dev);
- if (dev->flags & IFF_UP) {
+ if (dev->flags & IFF_UP && netif_device_present(dev)) {
if (netif_carrier_ok(dev))
dev_activate(dev);
else
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 91592fceeaad..69c41cb3966d 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -232,7 +232,8 @@ static void pneigh_queue_purge(struct sk_buff_head *list)
}
}
-static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
+static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
+ bool skip_perm)
{
int i;
struct neigh_hash_table *nht;
@@ -250,6 +251,10 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
np = &n->next;
continue;
}
+ if (skip_perm && n->nud_state & NUD_PERMANENT) {
+ np = &n->next;
+ continue;
+ }
rcu_assign_pointer(*np,
rcu_dereference_protected(n->next,
lockdep_is_held(&tbl->lock)));
@@ -285,21 +290,35 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
{
write_lock_bh(&tbl->lock);
- neigh_flush_dev(tbl, dev);
+ neigh_flush_dev(tbl, dev, false);
write_unlock_bh(&tbl->lock);
}
EXPORT_SYMBOL(neigh_changeaddr);
-int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
+ bool skip_perm)
{
write_lock_bh(&tbl->lock);
- neigh_flush_dev(tbl, dev);
+ neigh_flush_dev(tbl, dev, skip_perm);
pneigh_ifdown_and_unlock(tbl, dev);
del_timer_sync(&tbl->proxy_timer);
pneigh_queue_purge(&tbl->proxy_queue);
return 0;
}
+
+int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
+{
+ __neigh_ifdown(tbl, dev, true);
+ return 0;
+}
+EXPORT_SYMBOL(neigh_carrier_down);
+
+int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+{
+ __neigh_ifdown(tbl, dev, false);
+ return 0;
+}
EXPORT_SYMBOL(neigh_ifdown);
static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
@@ -1280,11 +1299,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
neigh->arp_queue_len_bytes = 0;
}
out:
- if (update_isrouter) {
- neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
- (neigh->flags | NTF_ROUTER) :
- (neigh->flags & ~NTF_ROUTER);
- }
+ if (update_isrouter)
+ neigh_update_is_router(neigh, flags, &notify);
write_unlock_bh(&neigh->lock);
if (notify)
@@ -1712,7 +1728,8 @@ out:
static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
- int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
+ int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
+ NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
struct nlattr *tb[NDA_MAX+1];
@@ -1787,12 +1804,16 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
}
if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
- flags &= ~NEIGH_UPDATE_F_OVERRIDE;
+ flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
+ NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
}
if (ndm->ndm_flags & NTF_EXT_LEARNED)
flags |= NEIGH_UPDATE_F_EXT_LEARNED;
+ if (ndm->ndm_flags & NTF_ROUTER)
+ flags |= NEIGH_UPDATE_F_ISROUTER;
+
if (ndm->ndm_flags & NTF_USE) {
neigh_event_send(neigh, NULL);
err = 0;
@@ -2162,15 +2183,47 @@ errout:
return err;
}
+static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct ndtmsg *ndtm;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
+ NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
+ return -EINVAL;
+ }
+
+ ndtm = nlmsg_data(nlh);
+ if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) {
+ NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
+ return -EINVAL;
+ }
+
+ if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
+ NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
int family, tidx, nidx = 0;
int tbl_skip = cb->args[0];
int neigh_skip = cb->args[1];
struct neigh_table *tbl;
- family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
+ if (cb->strict_check) {
+ int err = neightbl_valid_dump_info(nlh, cb->extack);
+
+ if (err < 0)
+ return err;
+ }
+
+ family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
struct neigh_parms *p;
@@ -2183,7 +2236,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
continue;
if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
+ nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
NLM_F_MULTI) < 0)
break;
@@ -2198,7 +2251,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
if (neightbl_fill_param_info(skb, tbl, p,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
+ nlh->nlmsg_seq,
RTM_NEWNEIGHTBL,
NLM_F_MULTI) < 0)
goto out;
@@ -2327,35 +2380,24 @@ static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
return false;
}
+struct neigh_dump_filter {
+ int master_idx;
+ int dev_idx;
+};
+
static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
- struct netlink_callback *cb)
+ struct netlink_callback *cb,
+ struct neigh_dump_filter *filter)
{
struct net *net = sock_net(skb->sk);
- const struct nlmsghdr *nlh = cb->nlh;
- struct nlattr *tb[NDA_MAX + 1];
struct neighbour *n;
int rc, h, s_h = cb->args[1];
int idx, s_idx = idx = cb->args[2];
struct neigh_hash_table *nht;
- int filter_master_idx = 0, filter_idx = 0;
unsigned int flags = NLM_F_MULTI;
- int err;
- err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL);
- if (!err) {
- if (tb[NDA_IFINDEX]) {
- if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
- return -EINVAL;
- filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
- }
- if (tb[NDA_MASTER]) {
- if (nla_len(tb[NDA_MASTER]) != sizeof(u32))
- return -EINVAL;
- filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
- }
- if (filter_idx || filter_master_idx)
- flags |= NLM_F_DUMP_FILTERED;
- }
+ if (filter->dev_idx || filter->master_idx)
+ flags |= NLM_F_DUMP_FILTERED;
rcu_read_lock_bh();
nht = rcu_dereference_bh(tbl->nht);
@@ -2368,8 +2410,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
n = rcu_dereference_bh(n->next)) {
if (idx < s_idx || !net_eq(dev_net(n->dev), net))
goto next;
- if (neigh_ifindex_filtered(n->dev, filter_idx) ||
- neigh_master_filtered(n->dev, filter_master_idx))
+ if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
+ neigh_master_filtered(n->dev, filter->master_idx))
goto next;
if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
@@ -2391,12 +2433,17 @@ out:
}
static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
- struct netlink_callback *cb)
+ struct netlink_callback *cb,
+ struct neigh_dump_filter *filter)
{
struct pneigh_entry *n;
struct net *net = sock_net(skb->sk);
int rc, h, s_h = cb->args[3];
int idx, s_idx = idx = cb->args[4];
+ unsigned int flags = NLM_F_MULTI;
+
+ if (filter->dev_idx || filter->master_idx)
+ flags |= NLM_F_DUMP_FILTERED;
read_lock_bh(&tbl->lock);
@@ -2406,10 +2453,12 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
if (idx < s_idx || pneigh_net(n) != net)
goto next;
+ if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
+ neigh_master_filtered(n->dev, filter->master_idx))
+ goto next;
if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- RTM_NEWNEIGH,
- NLM_F_MULTI, tbl) < 0) {
+ RTM_NEWNEIGH, flags, tbl) < 0) {
read_unlock_bh(&tbl->lock);
rc = -1;
goto out;
@@ -2428,22 +2477,91 @@ out:
}
+static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
+ bool strict_check,
+ struct neigh_dump_filter *filter,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[NDA_MAX + 1];
+ int err, i;
+
+ if (strict_check) {
+ struct ndmsg *ndm;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
+ NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
+ return -EINVAL;
+ }
+
+ ndm = nlmsg_data(nlh);
+ if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex ||
+ ndm->ndm_state || ndm->ndm_flags || ndm->ndm_type) {
+ NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
+ return -EINVAL;
+ }
+
+ err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
+ NULL, extack);
+ } else {
+ err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
+ NULL, extack);
+ }
+ if (err < 0)
+ return err;
+
+ for (i = 0; i <= NDA_MAX; ++i) {
+ if (!tb[i])
+ continue;
+
+ /* all new attributes should require strict_check */
+ switch (i) {
+ case NDA_IFINDEX:
+ if (nla_len(tb[i]) != sizeof(u32)) {
+ NL_SET_ERR_MSG(extack, "Invalid IFINDEX attribute in neighbor dump request");
+ return -EINVAL;
+ }
+ filter->dev_idx = nla_get_u32(tb[i]);
+ break;
+ case NDA_MASTER:
+ if (nla_len(tb[i]) != sizeof(u32)) {
+ NL_SET_ERR_MSG(extack, "Invalid MASTER attribute in neighbor dump request");
+ return -EINVAL;
+ }
+ filter->master_idx = nla_get_u32(tb[i]);
+ break;
+ default:
+ if (strict_check) {
+ NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
+ return -EINVAL;
+ }
+ }
+ }
+
+ return 0;
+}
+
static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
+ struct neigh_dump_filter filter = {};
struct neigh_table *tbl;
int t, family, s_t;
int proxy = 0;
int err;
- family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
+ family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
/* check for full ndmsg structure presence, family member is
* the same for both structures
*/
- if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
- ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
+ if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
+ ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
proxy = 1;
+ err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
+ if (err < 0 && cb->strict_check)
+ return err;
+
s_t = cb->args[0];
for (t = 0; t < NEIGH_NR_TABLES; t++) {
@@ -2457,9 +2575,9 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
memset(&cb->args[1], 0, sizeof(cb->args) -
sizeof(cb->args[0]));
if (proxy)
- err = pneigh_dump_table(tbl, skb, cb);
+ err = pneigh_dump_table(tbl, skb, cb, &filter);
else
- err = neigh_dump_table(tbl, skb, cb);
+ err = neigh_dump_table(tbl, skb, cb, &filter);
if (err < 0)
break;
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 670c84b1bfc2..fefe72774aeb 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -853,6 +853,12 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
.s_idx = cb->args[0],
};
+ if (cb->strict_check &&
+ nlmsg_attrlen(cb->nlh, sizeof(struct rtgenmsg))) {
+ NL_SET_ERR_MSG(cb->extack, "Unknown data in network namespace id dump request");
+ return -EINVAL;
+ }
+
spin_lock_bh(&net->nsid_lock);
idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
spin_unlock_bh(&net->nsid_lock);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 7f6938405fa1..6ac919847ce6 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3426,7 +3426,7 @@ xmit_more:
net_info_ratelimited("%s xmit error: %d\n",
pkt_dev->odevname, ret);
pkt_dev->errors++;
- /* fallthru */
+ /* fall through */
case NETDEV_TX_BUSY:
/* Retry it next time */
refcount_dec(&(pkt_dev->skb->users));
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 37c7936124e6..0958c7be2c22 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -59,7 +59,7 @@
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
-#define RTNL_MAX_TYPE 48
+#define RTNL_MAX_TYPE 49
#define RTNL_SLAVE_MAX_TYPE 36
struct rtnl_link {
@@ -130,6 +130,12 @@ int rtnl_is_locked(void)
}
EXPORT_SYMBOL(rtnl_is_locked);
+bool refcount_dec_and_rtnl_lock(refcount_t *r)
+{
+ return refcount_dec_and_mutex_lock(r, &rtnl_mutex);
+}
+EXPORT_SYMBOL(refcount_dec_and_rtnl_lock);
+
#ifdef CONFIG_PROVE_LOCKING
bool lockdep_rtnl_is_held(void)
{
@@ -1016,7 +1022,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(4) /* IFLA_NEW_NETNSID */
+ nla_total_size(4) /* IFLA_NEW_IFINDEX */
+ nla_total_size(1) /* IFLA_PROTO_DOWN */
- + nla_total_size(4) /* IFLA_IF_NETNSID */
+ + nla_total_size(4) /* IFLA_TARGET_NETNSID */
+ nla_total_size(4) /* IFLA_CARRIER_UP_COUNT */
+ nla_total_size(4) /* IFLA_CARRIER_DOWN_COUNT */
+ nla_total_size(4) /* IFLA_MIN_MTU */
@@ -1598,7 +1604,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
ifm->ifi_flags = dev_get_flags(dev);
ifm->ifi_change = change;
- if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_IF_NETNSID, tgt_netnsid))
+ if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid))
goto nla_put_failure;
if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
@@ -1737,7 +1743,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_XDP] = { .type = NLA_NESTED },
[IFLA_EVENT] = { .type = NLA_U32 },
[IFLA_GROUP] = { .type = NLA_U32 },
- [IFLA_IF_NETNSID] = { .type = NLA_S32 },
+ [IFLA_TARGET_NETNSID] = { .type = NLA_S32 },
[IFLA_CARRIER_UP_COUNT] = { .type = NLA_U32 },
[IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 },
[IFLA_MIN_MTU] = { .type = NLA_U32 },
@@ -1845,7 +1851,15 @@ static bool link_dump_filtered(struct net_device *dev,
return false;
}
-static struct net *get_target_net(struct sock *sk, int netnsid)
+/**
+ * rtnl_get_net_ns_capable - Get netns if sufficiently privileged.
+ * @sk: netlink socket
+ * @netnsid: network namespace identifier
+ *
+ * Returns the network namespace identified by netnsid on success or an error
+ * pointer on failure.
+ */
+struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid)
{
struct net *net;
@@ -1862,9 +1876,54 @@ static struct net *get_target_net(struct sock *sk, int netnsid)
}
return net;
}
+EXPORT_SYMBOL_GPL(rtnl_get_net_ns_capable);
+
+static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh,
+ bool strict_check, struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ int hdrlen;
+
+ if (strict_check) {
+ struct ifinfomsg *ifm;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+ NL_SET_ERR_MSG(extack, "Invalid header for link dump");
+ return -EINVAL;
+ }
+
+ ifm = nlmsg_data(nlh);
+ if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
+ ifm->ifi_change) {
+ NL_SET_ERR_MSG(extack, "Invalid values in header for link dump request");
+ return -EINVAL;
+ }
+ if (ifm->ifi_index) {
+ NL_SET_ERR_MSG(extack, "Filter by device index not supported for link dumps");
+ return -EINVAL;
+ }
+
+ return nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFLA_MAX,
+ ifla_policy, extack);
+ }
+
+ /* A hack to preserve kernel<->userspace interface.
+ * The correct header is ifinfomsg. It is consistent with rtnl_getlink.
+ * However, before Linux v3.9 the code here assumed rtgenmsg and that's
+ * what iproute2 < v3.9.0 used.
+ * We can detect the old iproute2. Even including the IFLA_EXT_MASK
+ * attribute, its netlink message is shorter than struct ifinfomsg.
+ */
+ hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ?
+ sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
+
+ return nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, extack);
+}
static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct netlink_ext_ack *extack = cb->extack;
+ const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
struct net *tgt_net = net;
int h, s_h;
@@ -1877,44 +1936,54 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
unsigned int flags = NLM_F_MULTI;
int master_idx = 0;
int netnsid = -1;
- int err;
- int hdrlen;
+ int err, i;
s_h = cb->args[0];
s_idx = cb->args[1];
- /* A hack to preserve kernel<->userspace interface.
- * The correct header is ifinfomsg. It is consistent with rtnl_getlink.
- * However, before Linux v3.9 the code here assumed rtgenmsg and that's
- * what iproute2 < v3.9.0 used.
- * We can detect the old iproute2. Even including the IFLA_EXT_MASK
- * attribute, its netlink message is shorter than struct ifinfomsg.
- */
- hdrlen = nlmsg_len(cb->nlh) < sizeof(struct ifinfomsg) ?
- sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
-
- if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX,
- ifla_policy, NULL) >= 0) {
- if (tb[IFLA_IF_NETNSID]) {
- netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
- tgt_net = get_target_net(skb->sk, netnsid);
- if (IS_ERR(tgt_net))
- return PTR_ERR(tgt_net);
- }
-
- if (tb[IFLA_EXT_MASK])
- ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+ err = rtnl_valid_dump_ifinfo_req(nlh, cb->strict_check, tb, extack);
+ if (err < 0) {
+ if (cb->strict_check)
+ return err;
- if (tb[IFLA_MASTER])
- master_idx = nla_get_u32(tb[IFLA_MASTER]);
+ goto walk_entries;
+ }
- if (tb[IFLA_LINKINFO])
- kind_ops = linkinfo_to_kind_ops(tb[IFLA_LINKINFO]);
+ for (i = 0; i <= IFLA_MAX; ++i) {
+ if (!tb[i])
+ continue;
- if (master_idx || kind_ops)
- flags |= NLM_F_DUMP_FILTERED;
+ /* new attributes should only be added with strict checking */
+ switch (i) {
+ case IFLA_TARGET_NETNSID:
+ netnsid = nla_get_s32(tb[i]);
+ tgt_net = rtnl_get_net_ns_capable(skb->sk, netnsid);
+ if (IS_ERR(tgt_net)) {
+ NL_SET_ERR_MSG(extack, "Invalid target network namespace id");
+ return PTR_ERR(tgt_net);
+ }
+ break;
+ case IFLA_EXT_MASK:
+ ext_filter_mask = nla_get_u32(tb[i]);
+ break;
+ case IFLA_MASTER:
+ master_idx = nla_get_u32(tb[i]);
+ break;
+ case IFLA_LINKINFO:
+ kind_ops = linkinfo_to_kind_ops(tb[i]);
+ break;
+ default:
+ if (cb->strict_check) {
+ NL_SET_ERR_MSG(extack, "Unsupported attribute in link dump request");
+ return -EINVAL;
+ }
+ }
}
+ if (master_idx || kind_ops)
+ flags |= NLM_F_DUMP_FILTERED;
+
+walk_entries:
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &tgt_net->dev_index_head[h];
@@ -1926,8 +1995,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
err = rtnl_fill_ifinfo(skb, dev, net,
RTM_NEWLINK,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, 0,
- flags,
+ nlh->nlmsg_seq, 0, flags,
ext_filter_mask, 0, NULL, 0,
netnsid);
@@ -1982,7 +2050,7 @@ EXPORT_SYMBOL(rtnl_link_get_net);
*
* 1. IFLA_NET_NS_PID
* 2. IFLA_NET_NS_FD
- * 3. IFLA_IF_NETNSID
+ * 3. IFLA_TARGET_NETNSID
*/
static struct net *rtnl_link_get_net_by_nlattr(struct net *src_net,
struct nlattr *tb[])
@@ -1992,10 +2060,10 @@ static struct net *rtnl_link_get_net_by_nlattr(struct net *src_net,
if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD])
return rtnl_link_get_net(src_net, tb);
- if (!tb[IFLA_IF_NETNSID])
+ if (!tb[IFLA_TARGET_NETNSID])
return get_net(src_net);
- net = get_net_ns_by_id(src_net, nla_get_u32(tb[IFLA_IF_NETNSID]));
+ net = get_net_ns_by_id(src_net, nla_get_u32(tb[IFLA_TARGET_NETNSID]));
if (!net)
return ERR_PTR(-EINVAL);
@@ -2036,13 +2104,13 @@ static int rtnl_ensure_unique_netns(struct nlattr *tb[],
return -EOPNOTSUPP;
}
- if (tb[IFLA_IF_NETNSID] && (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]))
+ if (tb[IFLA_TARGET_NETNSID] && (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]))
goto invalid_attr;
- if (tb[IFLA_NET_NS_PID] && (tb[IFLA_IF_NETNSID] || tb[IFLA_NET_NS_FD]))
+ if (tb[IFLA_NET_NS_PID] && (tb[IFLA_TARGET_NETNSID] || tb[IFLA_NET_NS_FD]))
goto invalid_attr;
- if (tb[IFLA_NET_NS_FD] && (tb[IFLA_IF_NETNSID] || tb[IFLA_NET_NS_PID]))
+ if (tb[IFLA_NET_NS_FD] && (tb[IFLA_TARGET_NETNSID] || tb[IFLA_NET_NS_PID]))
goto invalid_attr;
return 0;
@@ -2318,7 +2386,7 @@ static int do_setlink(const struct sk_buff *skb,
if (err < 0)
return err;
- if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_IF_NETNSID]) {
+ if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) {
struct net *net = rtnl_link_get_net_capable(skb, dev_net(dev),
tb, CAP_NET_ADMIN);
if (IS_ERR(net)) {
@@ -2761,9 +2829,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
- if (tb[IFLA_IF_NETNSID]) {
- netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
- tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid);
+ if (tb[IFLA_TARGET_NETNSID]) {
+ netnsid = nla_get_s32(tb[IFLA_TARGET_NETNSID]);
+ tgt_net = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, netnsid);
if (IS_ERR(tgt_net))
return PTR_ERR(tgt_net);
}
@@ -3177,9 +3245,9 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
- if (tb[IFLA_IF_NETNSID]) {
- netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
- tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid);
+ if (tb[IFLA_TARGET_NETNSID]) {
+ netnsid = nla_get_s32(tb[IFLA_TARGET_NETNSID]);
+ tgt_net = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, netnsid);
if (IS_ERR(tgt_net))
return PTR_ERR(tgt_net);
}
@@ -3264,13 +3332,13 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
{
int idx;
int s_idx = cb->family;
+ int type = cb->nlh->nlmsg_type - RTM_BASE;
if (s_idx == 0)
s_idx = 1;
for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
struct rtnl_link **tab;
- int type = cb->nlh->nlmsg_type-RTM_BASE;
struct rtnl_link *link;
rtnl_dumpit_func dumpit;
@@ -3731,22 +3799,66 @@ out:
}
EXPORT_SYMBOL(ndo_dflt_fdb_dump);
-static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
+static int valid_fdb_dump_strict(const struct nlmsghdr *nlh,
+ int *br_idx, int *brport_idx,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[NDA_MAX + 1];
+ struct ndmsg *ndm;
+ int err, i;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
+ NL_SET_ERR_MSG(extack, "Invalid header for fdb dump request");
+ return -EINVAL;
+ }
+
+ ndm = nlmsg_data(nlh);
+ if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
+ ndm->ndm_flags || ndm->ndm_type) {
+ NL_SET_ERR_MSG(extack, "Invalid values in header for fbd dump request");
+ return -EINVAL;
+ }
+
+ err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
+ NULL, extack);
+ if (err < 0)
+ return err;
+
+ *brport_idx = ndm->ndm_ifindex;
+ for (i = 0; i <= NDA_MAX; ++i) {
+ if (!tb[i])
+ continue;
+
+ switch (i) {
+ case NDA_IFINDEX:
+ if (nla_len(tb[i]) != sizeof(u32)) {
+ NL_SET_ERR_MSG(extack, "Invalid IFINDEX attribute in fdb dump request");
+ return -EINVAL;
+ }
+ *brport_idx = nla_get_u32(tb[NDA_IFINDEX]);
+ break;
+ case NDA_MASTER:
+ if (nla_len(tb[i]) != sizeof(u32)) {
+ NL_SET_ERR_MSG(extack, "Invalid MASTER attribute in fdb dump request");
+ return -EINVAL;
+ }
+ *br_idx = nla_get_u32(tb[NDA_MASTER]);
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Unsupported attribute in fdb dump request");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int valid_fdb_dump_legacy(const struct nlmsghdr *nlh,
+ int *br_idx, int *brport_idx,
+ struct netlink_ext_ack *extack)
{
- struct net_device *dev;
struct nlattr *tb[IFLA_MAX+1];
- struct net_device *br_dev = NULL;
- const struct net_device_ops *ops = NULL;
- const struct net_device_ops *cops = NULL;
- struct ifinfomsg *ifm = nlmsg_data(cb->nlh);
- struct net *net = sock_net(skb->sk);
- struct hlist_head *head;
- int brport_idx = 0;
- int br_idx = 0;
- int h, s_h;
- int idx = 0, s_idx;
- int err = 0;
- int fidx = 0;
+ int err;
/* A hack to preserve kernel<->userspace interface.
* Before Linux v4.12 this code accepted ndmsg since iproute2 v3.3.0.
@@ -3755,20 +3867,49 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
* Fortunately these sizes don't conflict with the size of ifinfomsg
* with an optional attribute.
*/
- if (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) &&
- (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) +
+ if (nlmsg_len(nlh) != sizeof(struct ndmsg) &&
+ (nlmsg_len(nlh) != sizeof(struct ndmsg) +
nla_attr_size(sizeof(u32)))) {
- err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
- IFLA_MAX, ifla_policy, NULL);
+ struct ifinfomsg *ifm;
+
+ err = nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
+ ifla_policy, extack);
if (err < 0) {
return -EINVAL;
} else if (err == 0) {
if (tb[IFLA_MASTER])
- br_idx = nla_get_u32(tb[IFLA_MASTER]);
+ *br_idx = nla_get_u32(tb[IFLA_MASTER]);
}
- brport_idx = ifm->ifi_index;
+ ifm = nlmsg_data(nlh);
+ *brport_idx = ifm->ifi_index;
}
+ return 0;
+}
+
+static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net_device *dev;
+ struct net_device *br_dev = NULL;
+ const struct net_device_ops *ops = NULL;
+ const struct net_device_ops *cops = NULL;
+ struct net *net = sock_net(skb->sk);
+ struct hlist_head *head;
+ int brport_idx = 0;
+ int br_idx = 0;
+ int h, s_h;
+ int idx = 0, s_idx;
+ int err = 0;
+ int fidx = 0;
+
+ if (cb->strict_check)
+ err = valid_fdb_dump_strict(cb->nlh, &br_idx, &brport_idx,
+ cb->extack);
+ else
+ err = valid_fdb_dump_legacy(cb->nlh, &br_idx, &brport_idx,
+ cb->extack);
+ if (err < 0)
+ return err;
if (br_idx) {
br_dev = __dev_get_by_index(net, br_idx);
@@ -3953,28 +4094,72 @@ nla_put_failure:
}
EXPORT_SYMBOL_GPL(ndo_dflt_bridge_getlink);
+static int valid_bridge_getlink_req(const struct nlmsghdr *nlh,
+ bool strict_check, u32 *filter_mask,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_MAX+1];
+ int err, i;
+
+ if (strict_check) {
+ struct ifinfomsg *ifm;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+ NL_SET_ERR_MSG(extack, "Invalid header for bridge link dump");
+ return -EINVAL;
+ }
+
+ ifm = nlmsg_data(nlh);
+ if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
+ ifm->ifi_change || ifm->ifi_index) {
+ NL_SET_ERR_MSG(extack, "Invalid values in header for bridge link dump request");
+ return -EINVAL;
+ }
+
+ err = nlmsg_parse_strict(nlh, sizeof(struct ifinfomsg), tb,
+ IFLA_MAX, ifla_policy, extack);
+ } else {
+ err = nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb,
+ IFLA_MAX, ifla_policy, extack);
+ }
+ if (err < 0)
+ return err;
+
+ /* new attributes should only be added with strict checking */
+ for (i = 0; i <= IFLA_MAX; ++i) {
+ if (!tb[i])
+ continue;
+
+ switch (i) {
+ case IFLA_EXT_MASK:
+ *filter_mask = nla_get_u32(tb[i]);
+ break;
+ default:
+ if (strict_check) {
+ NL_SET_ERR_MSG(extack, "Unsupported attribute in bridge link dump request");
+ return -EINVAL;
+ }
+ }
+ }
+
+ return 0;
+}
+
static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
struct net_device *dev;
int idx = 0;
u32 portid = NETLINK_CB(cb->skb).portid;
- u32 seq = cb->nlh->nlmsg_seq;
+ u32 seq = nlh->nlmsg_seq;
u32 filter_mask = 0;
int err;
- if (nlmsg_len(cb->nlh) > sizeof(struct ifinfomsg)) {
- struct nlattr *extfilt;
-
- extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct ifinfomsg),
- IFLA_EXT_MASK);
- if (extfilt) {
- if (nla_len(extfilt) < sizeof(filter_mask))
- return -EINVAL;
-
- filter_mask = nla_get_u32(extfilt);
- }
- }
+ err = valid_bridge_getlink_req(nlh, cb->strict_check, &filter_mask,
+ cb->extack);
+ if (err < 0 && cb->strict_check)
+ return err;
rcu_read_lock();
for_each_netdev_rcu(net, dev) {
@@ -4568,6 +4753,7 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh,
static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct netlink_ext_ack *extack = cb->extack;
int h, s_h, err, s_idx, s_idxattr, s_prividx;
struct net *net = sock_net(skb->sk);
unsigned int flags = NLM_F_MULTI;
@@ -4584,13 +4770,32 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
cb->seq = net->dev_base_seq;
- if (nlmsg_len(cb->nlh) < sizeof(*ifsm))
+ if (nlmsg_len(cb->nlh) < sizeof(*ifsm)) {
+ NL_SET_ERR_MSG(extack, "Invalid header for stats dump");
return -EINVAL;
+ }
ifsm = nlmsg_data(cb->nlh);
+
+ /* only requests using strict checks can pass data to influence
+ * the dump. The legacy exception is filter_mask.
+ */
+ if (cb->strict_check) {
+ if (ifsm->pad1 || ifsm->pad2 || ifsm->ifindex) {
+ NL_SET_ERR_MSG(extack, "Invalid values in header for stats dump request");
+ return -EINVAL;
+ }
+ if (nlmsg_attrlen(cb->nlh, sizeof(*ifsm))) {
+ NL_SET_ERR_MSG(extack, "Invalid attributes after stats header");
+ return -EINVAL;
+ }
+ }
+
filter_mask = ifsm->filter_mask;
- if (!filter_mask)
+ if (!filter_mask) {
+ NL_SET_ERR_MSG(extack, "Filter mask must be set for stats dump");
return -EINVAL;
+ }
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 428094b577fc..54b961de9538 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3381,64 +3381,6 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
}
EXPORT_SYMBOL(skb_find_text);
-/**
- * skb_append_datato_frags - append the user data to a skb
- * @sk: sock structure
- * @skb: skb structure to be appended with user data.
- * @getfrag: call back function to be used for getting the user data
- * @from: pointer to user message iov
- * @length: length of the iov message
- *
- * Description: This procedure append the user data in the fragment part
- * of the skb if any page alloc fails user this procedure returns -ENOMEM
- */
-int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
- int (*getfrag)(void *from, char *to, int offset,
- int len, int odd, struct sk_buff *skb),
- void *from, int length)
-{
- int frg_cnt = skb_shinfo(skb)->nr_frags;
- int copy;
- int offset = 0;
- int ret;
- struct page_frag *pfrag = &current->task_frag;
-
- do {
- /* Return error if we don't have space for new frag */
- if (frg_cnt >= MAX_SKB_FRAGS)
- return -EMSGSIZE;
-
- if (!sk_page_frag_refill(sk, pfrag))
- return -ENOMEM;
-
- /* copy the user data to page */
- copy = min_t(int, length, pfrag->size - pfrag->offset);
-
- ret = getfrag(from, page_address(pfrag->page) + pfrag->offset,
- offset, copy, 0, skb);
- if (ret < 0)
- return -EFAULT;
-
- /* copy was successful so update the size parameters */
- skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset,
- copy);
- frg_cnt++;
- pfrag->offset += copy;
- get_page(pfrag->page);
-
- skb->truesize += copy;
- refcount_add(copy, &sk->sk_wmem_alloc);
- skb->len += copy;
- skb->data_len += copy;
- offset += copy;
- length -= copy;
-
- } while (length > 0);
-
- return 0;
-}
-EXPORT_SYMBOL(skb_append_datato_frags);
-
int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
int offset, size_t size)
{
diff --git a/net/core/sock.c b/net/core/sock.c
index 3730eb855095..7e8796a6a089 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2317,7 +2317,7 @@ static void __lock_sock(struct sock *sk)
finish_wait(&sk->sk_lock.wq, &wait);
}
-static void __release_sock(struct sock *sk)
+void __release_sock(struct sock *sk)
__releases(&sk->sk_lock.slock)
__acquires(&sk->sk_lock.slock)
{
@@ -2332,7 +2332,7 @@ static void __release_sock(struct sock *sk)
next = skb->next;
prefetch(next);
WARN_ON_ONCE(skb_dst_is_noref(skb));
- skb->next = NULL;
+ skb_mark_not_on_list(skb);
sk_backlog_rcv(sk, skb);
cond_resched();
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 89b6785cef2a..4b2b194f4f1f 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -94,11 +94,21 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
kfree(xa);
}
-static void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
+void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
{
struct xdp_mem_allocator *xa;
int id = xdp_rxq->mem.id;
+ if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
+ WARN(1, "Missing register, driver bug");
+ return;
+ }
+
+ if (xdp_rxq->mem.type != MEM_TYPE_PAGE_POOL &&
+ xdp_rxq->mem.type != MEM_TYPE_ZERO_COPY) {
+ return;
+ }
+
if (id == 0)
return;
@@ -110,6 +120,7 @@ static void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
mutex_unlock(&mem_id_lock);
}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model);
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
{
@@ -119,7 +130,7 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
- __xdp_rxq_info_unreg_mem_model(xdp_rxq);
+ xdp_rxq_info_unreg_mem_model(xdp_rxq);
xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
xdp_rxq->dev = NULL;
@@ -398,3 +409,41 @@ void xdp_attachment_setup(struct xdp_attachment_info *info,
info->flags = bpf->flags;
}
EXPORT_SYMBOL_GPL(xdp_attachment_setup);
+
+struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
+{
+ unsigned int metasize, totsize;
+ void *addr, *data_to_copy;
+ struct xdp_frame *xdpf;
+ struct page *page;
+
+ /* Clone into a MEM_TYPE_PAGE_ORDER0 xdp_frame. */
+ metasize = xdp_data_meta_unsupported(xdp) ? 0 :
+ xdp->data - xdp->data_meta;
+ totsize = xdp->data_end - xdp->data + metasize;
+
+ if (sizeof(*xdpf) + totsize > PAGE_SIZE)
+ return NULL;
+
+ page = dev_alloc_page();
+ if (!page)
+ return NULL;
+
+ addr = page_to_virt(page);
+ xdpf = addr;
+ memset(xdpf, 0, sizeof(*xdpf));
+
+ addr += sizeof(*xdpf);
+ data_to_copy = metasize ? xdp->data_meta : xdp->data;
+ memcpy(addr, data_to_copy, totsize);
+
+ xdpf->data = addr + metasize;
+ xdpf->len = totsize - metasize;
+ xdpf->headroom = 0;
+ xdpf->metasize = metasize;
+ xdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
+
+ xdp_return_buff(xdp);
+ return xdpf;
+}
+EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame);
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index bfd43e8f2c06..d0b3e69c6b39 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1363,7 +1363,7 @@ static int dn_dev_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%-8s %1s %04u %04u %04lu %04lu"
" %04hu %03d %02x %-10s %-7s %-7s\n",
- dev->name ? dev->name : "???",
+ dev->name,
dn_type2asc(dn_db->parms.mode),
0, 0,
dn_db->t3, dn_db->parms.t3,
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 7f4534828f6c..a65d553e730d 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -29,6 +29,7 @@
#include <linux/keyctl.h>
#include <linux/err.h>
#include <linux/seq_file.h>
+#include <linux/dns_resolver.h>
#include <keys/dns_resolver-type.h>
#include <keys/user-type.h>
#include "internal.h"
@@ -48,27 +49,86 @@ const struct cred *dns_resolver_cache;
/*
* Preparse instantiation data for a dns_resolver key.
*
- * The data must be a NUL-terminated string, with the NUL char accounted in
- * datalen.
+ * For normal hostname lookups, the data must be a NUL-terminated string, with
+ * the NUL char accounted in datalen.
*
* If the data contains a '#' characters, then we take the clause after each
* one to be an option of the form 'key=value'. The actual data of interest is
* the string leading up to the first '#'. For instance:
*
* "ip1,ip2,...#foo=bar"
+ *
+ * For server list requests, the data must begin with a NUL char and be
+ * followed by a byte indicating the version of the data format. Version 1
+ * looks something like (note this is packed):
+ *
+ * u8 Non-string marker (ie. 0)
+ * u8 Content (DNS_PAYLOAD_IS_*)
+ * u8 Version (e.g. 1)
+ * u8 Source of server list
+ * u8 Lookup status of server list
+ * u8 Number of servers
+ * foreach-server {
+ * __le16 Name length
+ * __le16 Priority (as per SRV record, low first)
+ * __le16 Weight (as per SRV record, higher first)
+ * __le16 Port
+ * u8 Source of address list
+ * u8 Lookup status of address list
+ * u8 Protocol (DNS_SERVER_PROTOCOL_*)
+ * u8 Number of addresses
+ * char[] Name (not NUL-terminated)
+ * foreach-address {
+ * u8 Family (DNS_ADDRESS_IS_*)
+ * union {
+ * u8[4] ipv4_addr
+ * u8[16] ipv6_addr
+ * }
+ * }
+ * }
+ *
*/
static int
dns_resolver_preparse(struct key_preparsed_payload *prep)
{
+ const struct dns_payload_header *bin;
struct user_key_payload *upayload;
unsigned long derrno;
int ret;
int datalen = prep->datalen, result_len = 0;
const char *data = prep->data, *end, *opt;
+ if (datalen <= 1 || !data)
+ return -EINVAL;
+
+ if (data[0] == 0) {
+ /* It may be a server list. */
+ if (datalen <= sizeof(*bin))
+ return -EINVAL;
+
+ bin = (const struct dns_payload_header *)data;
+ kenter("[%u,%u],%u", bin->content, bin->version, datalen);
+ if (bin->content != DNS_PAYLOAD_IS_SERVER_LIST) {
+ pr_warn_ratelimited(
+ "dns_resolver: Unsupported content type (%u)\n",
+ bin->content);
+ return -EINVAL;
+ }
+
+ if (bin->version != 1) {
+ pr_warn_ratelimited(
+ "dns_resolver: Unsupported server list version (%u)\n",
+ bin->version);
+ return -EINVAL;
+ }
+
+ result_len = datalen;
+ goto store_result;
+ }
+
kenter("'%*.*s',%u", datalen, datalen, data, datalen);
- if (datalen <= 1 || !data || data[datalen - 1] != '\0')
+ if (!data || data[datalen - 1] != '\0')
return -EINVAL;
datalen--;
@@ -144,6 +204,7 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
return 0;
}
+store_result:
kdebug("store result");
prep->quotalen = result_len;
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index 49da67034f29..76338c38738a 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -148,12 +148,9 @@ int dns_query(const char *type, const char *name, size_t namelen,
if (_result) {
ret = -ENOMEM;
- *_result = kmalloc(len + 1, GFP_KERNEL);
+ *_result = kmemdup_nul(upayload->data, len, GFP_KERNEL);
if (!*_result)
goto put;
-
- memcpy(*_result, upayload->data, len);
- (*_result)[len] = '\0';
}
if (_expiry)
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 4183e4ba27a5..48c41918fb35 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -38,6 +38,9 @@ config NET_DSA_TAG_DSA
config NET_DSA_TAG_EDSA
bool
+config NET_DSA_TAG_GSWIP
+ bool
+
config NET_DSA_TAG_KSZ
bool
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 9e4d3536f977..6e721f7a2947 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -9,6 +9,7 @@ dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
dsa_core-$(CONFIG_NET_DSA_TAG_BRCM_PREPEND) += tag_brcm.o
dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
+dsa_core-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o
dsa_core-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o
dsa_core-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 9f3209ff7ffd..a69c1790bbfc 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -52,6 +52,9 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
#ifdef CONFIG_NET_DSA_TAG_EDSA
[DSA_TAG_PROTO_EDSA] = &edsa_netdev_ops,
#endif
+#ifdef CONFIG_NET_DSA_TAG_GSWIP
+ [DSA_TAG_PROTO_GSWIP] = &gswip_netdev_ops,
+#endif
#ifdef CONFIG_NET_DSA_TAG_KSZ
[DSA_TAG_PROTO_KSZ] = &ksz_netdev_ops,
#endif
@@ -70,6 +73,52 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
[DSA_TAG_PROTO_NONE] = &none_ops,
};
+const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops)
+{
+ const char *protocol_name[DSA_TAG_LAST] = {
+#ifdef CONFIG_NET_DSA_TAG_BRCM
+ [DSA_TAG_PROTO_BRCM] = "brcm",
+#endif
+#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND
+ [DSA_TAG_PROTO_BRCM_PREPEND] = "brcm-prepend",
+#endif
+#ifdef CONFIG_NET_DSA_TAG_DSA
+ [DSA_TAG_PROTO_DSA] = "dsa",
+#endif
+#ifdef CONFIG_NET_DSA_TAG_EDSA
+ [DSA_TAG_PROTO_EDSA] = "edsa",
+#endif
+#ifdef CONFIG_NET_DSA_TAG_GSWIP
+ [DSA_TAG_PROTO_GSWIP] = "gswip",
+#endif
+#ifdef CONFIG_NET_DSA_TAG_KSZ
+ [DSA_TAG_PROTO_KSZ] = "ksz",
+#endif
+#ifdef CONFIG_NET_DSA_TAG_LAN9303
+ [DSA_TAG_PROTO_LAN9303] = "lan9303",
+#endif
+#ifdef CONFIG_NET_DSA_TAG_MTK
+ [DSA_TAG_PROTO_MTK] = "mtk",
+#endif
+#ifdef CONFIG_NET_DSA_TAG_QCA
+ [DSA_TAG_PROTO_QCA] = "qca",
+#endif
+#ifdef CONFIG_NET_DSA_TAG_TRAILER
+ [DSA_TAG_PROTO_TRAILER] = "trailer",
+#endif
+ [DSA_TAG_PROTO_NONE] = "none",
+ };
+ unsigned int i;
+
+ BUILD_BUG_ON(ARRAY_SIZE(protocol_name) != DSA_TAG_LAST);
+
+ for (i = 0; i < ARRAY_SIZE(dsa_device_ops); i++)
+ if (ops == dsa_device_ops[i])
+ return protocol_name[i];
+
+ return protocol_name[DSA_TAG_PROTO_NONE];
+};
+
const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
{
const struct dsa_device_ops *ops;
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 3964c6f7a7c0..9e4fd04ab53c 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -86,6 +86,7 @@ struct dsa_slave_priv {
/* dsa.c */
const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
bool dsa_schedule_work(struct work_struct *work);
+const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops);
/* legacy.c */
#if IS_ENABLED(CONFIG_NET_DSA_LEGACY)
@@ -205,6 +206,9 @@ extern const struct dsa_device_ops dsa_netdev_ops;
/* tag_edsa.c */
extern const struct dsa_device_ops edsa_netdev_ops;
+/* tag_gswip.c */
+extern const struct dsa_device_ops gswip_netdev_ops;
+
/* tag_ksz.c */
extern const struct dsa_device_ops ksz_netdev_ops;
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index 42a7b85b84e1..8aa92b09db76 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -392,8 +392,7 @@ static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
}
/* Drop our reference to the MDIO bus device */
- if (pd->chip[i].host_dev)
- put_device(pd->chip[i].host_dev);
+ put_device(pd->chip[i].host_dev);
}
kfree(pd->chip);
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 1c45c1d6d241..3f840b6eea69 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1058,6 +1058,27 @@ static struct device_type dsa_type = {
.name = "dsa",
};
+static ssize_t tagging_show(struct device *d, struct device_attribute *attr,
+ char *buf)
+{
+ struct net_device *dev = to_net_dev(d);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ return sprintf(buf, "%s\n",
+ dsa_tag_protocol_to_str(dp->cpu_dp->tag_ops));
+}
+static DEVICE_ATTR_RO(tagging);
+
+static struct attribute *dsa_slave_attrs[] = {
+ &dev_attr_tagging.attr,
+ NULL
+};
+
+static const struct attribute_group dsa_group = {
+ .name = "dsa",
+ .attrs = dsa_slave_attrs,
+};
+
static void dsa_slave_phylink_validate(struct net_device *dev,
unsigned long *supported,
struct phylink_link_state *state)
@@ -1353,8 +1374,14 @@ int dsa_slave_create(struct dsa_port *port)
goto out_phy;
}
+ ret = sysfs_create_group(&slave_dev->dev.kobj, &dsa_group);
+ if (ret)
+ goto out_unreg;
+
return 0;
+out_unreg:
+ unregister_netdev(slave_dev);
out_phy:
rtnl_lock();
phylink_disconnect_phy(p->dp->pl);
@@ -1378,6 +1405,7 @@ void dsa_slave_destroy(struct net_device *slave_dev)
rtnl_unlock();
dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER);
+ sysfs_remove_group(&slave_dev->dev.kobj, &dsa_group);
unregister_netdev(slave_dev);
phylink_destroy(dp->pl);
free_percpu(p->stats64);
diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c
new file mode 100644
index 000000000000..49e9b73f1be3
--- /dev/null
+++ b/net/dsa/tag_gswip.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel / Lantiq GSWIP V2.0 PMAC tag support
+ *
+ * Copyright (C) 2017 - 2018 Hauke Mehrtens <hauke@hauke-m.de>
+ */
+
+#include <linux/bitops.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <net/dsa.h>
+
+#include "dsa_priv.h"
+
+#define GSWIP_TX_HEADER_LEN 4
+
+/* special tag in TX path header */
+/* Byte 0 */
+#define GSWIP_TX_SLPID_SHIFT 0 /* source port ID */
+#define GSWIP_TX_SLPID_CPU 2
+#define GSWIP_TX_SLPID_APP1 3
+#define GSWIP_TX_SLPID_APP2 4
+#define GSWIP_TX_SLPID_APP3 5
+#define GSWIP_TX_SLPID_APP4 6
+#define GSWIP_TX_SLPID_APP5 7
+
+/* Byte 1 */
+#define GSWIP_TX_CRCGEN_DIS BIT(7)
+#define GSWIP_TX_DPID_SHIFT 0 /* destination group ID */
+#define GSWIP_TX_DPID_ELAN 0
+#define GSWIP_TX_DPID_EWAN 1
+#define GSWIP_TX_DPID_CPU 2
+#define GSWIP_TX_DPID_APP1 3
+#define GSWIP_TX_DPID_APP2 4
+#define GSWIP_TX_DPID_APP3 5
+#define GSWIP_TX_DPID_APP4 6
+#define GSWIP_TX_DPID_APP5 7
+
+/* Byte 2 */
+#define GSWIP_TX_PORT_MAP_EN BIT(7)
+#define GSWIP_TX_PORT_MAP_SEL BIT(6)
+#define GSWIP_TX_LRN_DIS BIT(5)
+#define GSWIP_TX_CLASS_EN BIT(4)
+#define GSWIP_TX_CLASS_SHIFT 0
+#define GSWIP_TX_CLASS_MASK GENMASK(3, 0)
+
+/* Byte 3 */
+#define GSWIP_TX_DPID_EN BIT(0)
+#define GSWIP_TX_PORT_MAP_SHIFT 1
+#define GSWIP_TX_PORT_MAP_MASK GENMASK(6, 1)
+
+#define GSWIP_RX_HEADER_LEN 8
+
+/* special tag in RX path header */
+/* Byte 7 */
+#define GSWIP_RX_SPPID_SHIFT 4
+#define GSWIP_RX_SPPID_MASK GENMASK(6, 4)
+
+static struct sk_buff *gswip_tag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ int err;
+ u8 *gswip_tag;
+
+ err = skb_cow_head(skb, GSWIP_TX_HEADER_LEN);
+ if (err)
+ return NULL;
+
+ skb_push(skb, GSWIP_TX_HEADER_LEN);
+
+ gswip_tag = skb->data;
+ gswip_tag[0] = GSWIP_TX_SLPID_CPU;
+ gswip_tag[1] = GSWIP_TX_DPID_ELAN;
+ gswip_tag[2] = GSWIP_TX_PORT_MAP_EN | GSWIP_TX_PORT_MAP_SEL;
+ gswip_tag[3] = BIT(dp->index + GSWIP_TX_PORT_MAP_SHIFT) & GSWIP_TX_PORT_MAP_MASK;
+ gswip_tag[3] |= GSWIP_TX_DPID_EN;
+
+ return skb;
+}
+
+static struct sk_buff *gswip_tag_rcv(struct sk_buff *skb,
+ struct net_device *dev,
+ struct packet_type *pt)
+{
+ int port;
+ u8 *gswip_tag;
+
+ if (unlikely(!pskb_may_pull(skb, GSWIP_RX_HEADER_LEN)))
+ return NULL;
+
+ gswip_tag = skb->data - ETH_HLEN;
+
+ /* Get source port information */
+ port = (gswip_tag[7] & GSWIP_RX_SPPID_MASK) >> GSWIP_RX_SPPID_SHIFT;
+ skb->dev = dsa_master_find_slave(dev, 0, port);
+ if (!skb->dev)
+ return NULL;
+
+ /* remove GSWIP tag */
+ skb_pull_rcsum(skb, GSWIP_RX_HEADER_LEN);
+
+ return skb;
+}
+
+const struct dsa_device_ops gswip_netdev_ops = {
+ .xmit = gswip_tag_xmit,
+ .rcv = gswip_tag_rcv,
+};
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index e7857a8ac86d..d14226ecfde4 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -260,7 +260,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
}
sub_frag_mem_limit(fq->q.net, sum_truesize);
- head->next = NULL;
+ skb_mark_not_on_list(head);
head->dev = ldev;
head->tstamp = fq->q.stamp;
@@ -463,7 +463,6 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
table[0].data = &ieee802154_lowpan->frags.high_thresh;
table[0].extra1 = &ieee802154_lowpan->frags.low_thresh;
- table[0].extra2 = &init_net.ieee802154_lowpan.frags.high_thresh;
table[1].data = &ieee802154_lowpan->frags.low_thresh;
table[1].extra2 = &ieee802154_lowpan->frags.high_thresh;
table[2].data = &ieee802154_lowpan->frags.timeout;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 4dd95cdd8070..c01fa791260d 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -461,9 +461,9 @@ static int ah4_err(struct sk_buff *skb, u32 info)
return 0;
if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
- ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0);
+ ipv4_update_pmtu(skb, net, info, 0, IPPROTO_AH);
else
- ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0);
+ ipv4_redirect(skb, net, 0, IPPROTO_AH);
xfrm_state_put(x);
return 0;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index e90c89ef8c08..850a6f13a082 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1255,6 +1255,8 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event,
change_info = ptr;
if (change_info->flags_changed & IFF_NOARP)
neigh_changeaddr(&arp_tbl, dev);
+ if (!netif_carrier_ok(dev))
+ neigh_carrier_down(&arp_tbl, dev);
break;
default:
break;
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 82178cc69c96..777fa3b7fb13 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1512,7 +1512,7 @@ static int cipso_v4_parsetag_loc(const struct cipso_v4_doi *doi_def,
*
* Description:
* Parse the packet's IP header looking for a CIPSO option. Returns a pointer
- * to the start of the CIPSO option on success, NULL if one if not found.
+ * to the start of the CIPSO option on success, NULL if one is not found.
*
*/
unsigned char *cipso_v4_optptr(const struct sk_buff *skb)
@@ -1522,10 +1522,8 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb)
int optlen;
int taglen;
- for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 0; ) {
+ for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 1; ) {
switch (optptr[0]) {
- case IPOPT_CIPSO:
- return optptr;
case IPOPT_END:
return NULL;
case IPOPT_NOOP:
@@ -1534,6 +1532,11 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb)
default:
taglen = optptr[1];
}
+ if (!taglen || taglen > optlen)
+ return NULL;
+ if (optptr[0] == IPOPT_CIPSO)
+ return optptr;
+
optlen -= taglen;
optptr += taglen;
}
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index f915abff1350..300921417f89 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -42,7 +42,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
oif = sk->sk_bound_dev_if;
saddr = inet->inet_saddr;
if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
- if (!oif)
+ if (!oif || netif_index_is_l3_master(sock_net(sk), oif))
oif = inet->mc_index;
if (!saddr)
saddr = inet->mc_addr;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ea4bd8a52422..d122ebbe5980 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -100,6 +100,15 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
[IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
[IFA_FLAGS] = { .type = NLA_U32 },
[IFA_RT_PRIORITY] = { .type = NLA_U32 },
+ [IFA_TARGET_NETNSID] = { .type = NLA_S32 },
+};
+
+struct inet_fill_args {
+ u32 portid;
+ u32 seq;
+ int event;
+ unsigned int flags;
+ int netnsid;
};
#define IN4_ADDR_HSIZE_SHIFT 8
@@ -773,7 +782,8 @@ static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
}
static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
- __u32 *pvalid_lft, __u32 *pprefered_lft)
+ __u32 *pvalid_lft, __u32 *pprefered_lft,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[IFA_MAX+1];
struct in_ifaddr *ifa;
@@ -783,7 +793,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
int err;
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
- NULL);
+ extack);
if (err < 0)
goto errout;
@@ -888,7 +898,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
ASSERT_RTNL();
- ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
+ ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
if (IS_ERR(ifa))
return PTR_ERR(ifa);
@@ -1584,13 +1594,14 @@ static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
}
static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
- u32 portid, u32 seq, int event, unsigned int flags)
+ struct inet_fill_args *args)
{
struct ifaddrmsg *ifm;
struct nlmsghdr *nlh;
u32 preferred, valid;
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
+ nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
+ args->flags);
if (!nlh)
return -EMSGSIZE;
@@ -1601,6 +1612,10 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
ifm->ifa_scope = ifa->ifa_scope;
ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
+ if (args->netnsid >= 0 &&
+ nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
+ goto nla_put_failure;
+
if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
preferred = ifa->ifa_preferred_lft;
valid = ifa->ifa_valid_lft;
@@ -1645,9 +1660,71 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
+ struct inet_fill_args *fillargs,
+ struct net **tgt_net, struct sock *sk,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFA_MAX+1];
+ struct ifaddrmsg *ifm;
+ int err, i;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+ NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
+ return -EINVAL;
+ }
+
+ ifm = nlmsg_data(nlh);
+ if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
+ NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
+ return -EINVAL;
+ }
+ if (ifm->ifa_index) {
+ NL_SET_ERR_MSG(extack, "ipv4: Filter by device index not supported for address dump");
+ return -EINVAL;
+ }
+
+ err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
+ ifa_ipv4_policy, extack);
+ if (err < 0)
+ return err;
+
+ for (i = 0; i <= IFA_MAX; ++i) {
+ if (!tb[i])
+ continue;
+
+ if (i == IFA_TARGET_NETNSID) {
+ struct net *net;
+
+ fillargs->netnsid = nla_get_s32(tb[i]);
+
+ net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
+ if (IS_ERR(net)) {
+ NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
+ return PTR_ERR(net);
+ }
+ *tgt_net = net;
+ } else {
+ NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
+ struct inet_fill_args fillargs = {
+ .portid = NETLINK_CB(cb->skb).portid,
+ .seq = nlh->nlmsg_seq,
+ .event = RTM_NEWADDR,
+ .flags = NLM_F_MULTI,
+ .netnsid = -1,
+ };
struct net *net = sock_net(skb->sk);
+ struct net *tgt_net = net;
int h, s_h;
int idx, s_idx;
int ip_idx, s_ip_idx;
@@ -1660,12 +1737,21 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
s_idx = idx = cb->args[1];
s_ip_idx = ip_idx = cb->args[2];
+ if (cb->strict_check) {
+ int err;
+
+ err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
+ skb->sk, cb->extack);
+ if (err < 0)
+ return err;
+ }
+
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
- head = &net->dev_index_head[h];
+ head = &tgt_net->dev_index_head[h];
rcu_read_lock();
- cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
- net->dev_base_seq;
+ cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
+ tgt_net->dev_base_seq;
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
@@ -1679,10 +1765,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
ifa = ifa->ifa_next, ip_idx++) {
if (ip_idx < s_ip_idx)
continue;
- if (inet_fill_ifaddr(skb, ifa,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWADDR, NLM_F_MULTI) < 0) {
+ if (inet_fill_ifaddr(skb, ifa, &fillargs) < 0) {
rcu_read_unlock();
goto done;
}
@@ -1698,6 +1781,8 @@ done:
cb->args[0] = h;
cb->args[1] = idx;
cb->args[2] = ip_idx;
+ if (fillargs.netnsid >= 0)
+ put_net(tgt_net);
return skb->len;
}
@@ -1705,8 +1790,14 @@ done:
static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
u32 portid)
{
+ struct inet_fill_args fillargs = {
+ .portid = portid,
+ .seq = nlh ? nlh->nlmsg_seq : 0,
+ .event = event,
+ .flags = 0,
+ .netnsid = -1,
+ };
struct sk_buff *skb;
- u32 seq = nlh ? nlh->nlmsg_seq : 0;
int err = -ENOBUFS;
struct net *net;
@@ -1715,7 +1806,7 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
if (!skb)
goto errout;
- err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
+ err = inet_fill_ifaddr(skb, ifa, &fillargs);
if (err < 0) {
/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -1995,6 +2086,7 @@ errout:
static int inet_netconf_dump_devconf(struct sk_buff *skb,
struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
int h, s_h;
int idx, s_idx;
@@ -2002,6 +2094,21 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
struct in_device *in_dev;
struct hlist_head *head;
+ if (cb->strict_check) {
+ struct netlink_ext_ack *extack = cb->extack;
+ struct netconfmsg *ncm;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
+ NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
+ return -EINVAL;
+ }
+
+ if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
+ NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
+ return -EINVAL;
+ }
+ }
+
s_h = cb->args[0];
s_idx = idx = cb->args[1];
@@ -2021,7 +2128,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
if (inet_netconf_fill_devconf(skb, dev->ifindex,
&in_dev->cnf,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
+ nlh->nlmsg_seq,
RTM_NEWNETCONF,
NLM_F_MULTI,
NETCONFA_ALL) < 0) {
@@ -2038,7 +2145,7 @@ cont:
if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
net->ipv4.devconf_all,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
+ nlh->nlmsg_seq,
RTM_NEWNETCONF, NLM_F_MULTI,
NETCONFA_ALL) < 0)
goto done;
@@ -2049,7 +2156,7 @@ cont:
if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
net->ipv4.devconf_dflt,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
+ nlh->nlmsg_seq,
RTM_NEWNETCONF, NLM_F_MULTI,
NETCONFA_ALL) < 0)
goto done;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 97689012b357..9e1c840596c5 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -683,12 +683,11 @@ static void esp_input_done_esn(struct crypto_async_request *base, int err)
*/
static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
{
- struct ip_esp_hdr *esph;
struct crypto_aead *aead = x->data;
struct aead_request *req;
struct sk_buff *trailer;
int ivlen = crypto_aead_ivsize(aead);
- int elen = skb->len - sizeof(*esph) - ivlen;
+ int elen = skb->len - sizeof(struct ip_esp_hdr) - ivlen;
int nfrags;
int assoclen;
int seqhilen;
@@ -698,13 +697,13 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
struct scatterlist *sg;
int err = -EINVAL;
- if (!pskb_may_pull(skb, sizeof(*esph) + ivlen))
+ if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + ivlen))
goto out;
if (elen <= 0)
goto out;
- assoclen = sizeof(*esph);
+ assoclen = sizeof(struct ip_esp_hdr);
seqhilen = 0;
if (x->props.flags & XFRM_STATE_ESN) {
@@ -820,9 +819,9 @@ static int esp4_err(struct sk_buff *skb, u32 info)
return 0;
if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
- ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0);
+ ipv4_update_pmtu(skb, net, info, 0, IPPROTO_ESP);
else
- ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0);
+ ipv4_redirect(skb, net, 0, IPPROTO_ESP);
xfrm_state_put(x);
return 0;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 0113993e9b2c..0f1beceb47d5 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -315,6 +315,32 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
return inet_select_addr(dev, ip_hdr(skb)->saddr, scope);
}
+bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev)
+{
+ bool dev_match = false;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ int ret;
+
+ for (ret = 0; ret < fi->fib_nhs; ret++) {
+ struct fib_nh *nh = &fi->fib_nh[ret];
+
+ if (nh->nh_dev == dev) {
+ dev_match = true;
+ break;
+ } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
+ dev_match = true;
+ break;
+ }
+ }
+#else
+ if (fi->fib_nh[0].nh_dev == dev)
+ dev_match = true;
+#endif
+
+ return dev_match;
+}
+EXPORT_SYMBOL_GPL(fib_info_nh_uses_dev);
+
/* Given (packet source, input interface) and optional (dst, oif, tos):
* - (main) check, that source is valid i.e. not broadcast or our local
* address.
@@ -361,24 +387,8 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
(res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
goto e_inval;
fib_combine_itag(itag, &res);
- dev_match = false;
-
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
- for (ret = 0; ret < res.fi->fib_nhs; ret++) {
- struct fib_nh *nh = &res.fi->fib_nh[ret];
- if (nh->nh_dev == dev) {
- dev_match = true;
- break;
- } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
- dev_match = true;
- break;
- }
- }
-#else
- if (FIB_RES_DEV(res) == dev)
- dev_match = true;
-#endif
+ dev_match = fib_info_nh_uses_dev(res.fi, dev);
if (dev_match) {
ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
return ret;
@@ -792,8 +802,40 @@ errout:
return err;
}
+int ip_valid_fib_dump_req(const struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct rtmsg *rtm;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
+ NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request");
+ return -EINVAL;
+ }
+
+ rtm = nlmsg_data(nlh);
+ if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos ||
+ rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
+ rtm->rtm_type) {
+ NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request");
+ return -EINVAL;
+ }
+ if (rtm->rtm_flags & ~(RTM_F_CLONED | RTM_F_PREFIX)) {
+ NL_SET_ERR_MSG(extack, "Invalid flags for FIB dump request");
+ return -EINVAL;
+ }
+
+ if (nlmsg_attrlen(nlh, sizeof(*rtm))) {
+ NL_SET_ERR_MSG(extack, "Invalid data after header in FIB dump request");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ip_valid_fib_dump_req);
+
static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
unsigned int h, s_h;
unsigned int e = 0, s_e;
@@ -801,8 +843,14 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
struct hlist_head *head;
int dumped = 0, err;
- if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
- ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
+ if (cb->strict_check) {
+ err = ip_valid_fib_dump_req(nlh, cb->extack);
+ if (err < 0)
+ return err;
+ }
+
+ if (nlmsg_len(nlh) >= sizeof(struct rtmsg) &&
+ ((struct rtmsg *)nlmsg_data(nlh))->rtm_flags & RTM_F_CLONED)
return skb->len;
s_h = cb->args[0];
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 446204ca7406..b5c3937ca6ec 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -208,7 +208,6 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
static void free_fib_info_rcu(struct rcu_head *head)
{
struct fib_info *fi = container_of(head, struct fib_info, rcu);
- struct dst_metrics *m;
change_nexthops(fi) {
if (nexthop_nh->nh_dev)
@@ -219,9 +218,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
rt_fibinfo_free(&nexthop_nh->nh_rth_input);
} endfor_nexthops(fi);
- m = fi->fib_metrics;
- if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
- kfree(m);
+ ip_fib_metrics_put(fi->fib_metrics);
+
kfree(fi);
}
@@ -797,8 +795,10 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
return -EINVAL;
}
dev = __dev_get_by_index(net, nh->nh_oif);
- if (!dev)
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Nexthop device required for onlink");
return -ENODEV;
+ }
if (!(dev->flags & IFF_UP)) {
NL_SET_ERR_MSG(extack,
"Nexthop device is not up");
@@ -1018,13 +1018,6 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
return true;
}
-static int
-fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
-{
- return ip_metrics_convert(fi->fib_net, cfg->fc_mx, cfg->fc_mx_len,
- fi->fib_metrics->metrics);
-}
-
struct fib_info *fib_create_info(struct fib_config *cfg,
struct netlink_ext_ack *extack)
{
@@ -1082,16 +1075,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
if (!fi)
goto failure;
- if (cfg->fc_mx) {
- fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL);
- if (unlikely(!fi->fib_metrics)) {
- kfree(fi);
- return ERR_PTR(err);
- }
- refcount_set(&fi->fib_metrics->refcnt, 1);
- } else {
- fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
+ fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx,
+ cfg->fc_mx_len);
+ if (unlikely(IS_ERR(fi->fib_metrics))) {
+ err = PTR_ERR(fi->fib_metrics);
+ kfree(fi);
+ return ERR_PTR(err);
}
+
fib_info_cnt++;
fi->fib_net = net;
fi->fib_protocol = cfg->fc_protocol;
@@ -1110,10 +1101,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto failure;
} endfor_nexthops(fi)
- err = fib_convert_metrics(fi, cfg);
- if (err)
- goto failure;
-
if (cfg->fc_mp) {
#ifdef CONFIG_IP_ROUTE_MULTIPATH
err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index b798862b6be5..7efe740c06eb 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -86,13 +86,14 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
options = (__be32 *)(greh + 1);
if (greh->flags & GRE_CSUM) {
- if (skb_checksum_simple_validate(skb)) {
+ if (!skb_checksum_simple_validate(skb)) {
+ skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
+ null_compute_pseudo);
+ } else if (csum_err) {
*csum_err = true;
return -EINVAL;
}
- skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
- null_compute_pseudo);
options++;
}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 695979b7ef6d..d832beed6e3a 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1098,9 +1098,9 @@ void icmp_err(struct sk_buff *skb, u32 info)
}
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
- ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ICMP, 0);
+ ipv4_update_pmtu(skb, net, info, 0, IPPROTO_ICMP);
else if (type == ICMP_REDIRECT)
- ipv4_redirect(skb, net, 0, 0, IPPROTO_ICMP, 0);
+ ipv4_redirect(skb, net, 0, IPPROTO_ICMP);
}
/*
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index e7227128df2c..9b0158fa431f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -260,8 +260,7 @@ out:
spin_unlock(&qp->q.lock);
out_rcu_unlock:
rcu_read_unlock();
- if (head)
- kfree_skb(head);
+ kfree_skb(head);
ipq_put(qp);
}
@@ -382,7 +381,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
*/
if (end < qp->q.len ||
((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len))
- goto err;
+ goto discard_qp;
qp->q.flags |= INET_FRAG_LAST_IN;
qp->q.len = end;
} else {
@@ -394,20 +393,20 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
if (end > qp->q.len) {
/* Some bits beyond end -> corruption. */
if (qp->q.flags & INET_FRAG_LAST_IN)
- goto err;
+ goto discard_qp;
qp->q.len = end;
}
}
if (end == offset)
- goto err;
+ goto discard_qp;
err = -ENOMEM;
if (!pskb_pull(skb, skb_network_offset(skb) + ihl))
- goto err;
+ goto discard_qp;
err = pskb_trim_rcsum(skb, end - offset);
if (err)
- goto err;
+ goto discard_qp;
/* Note : skb->rbnode and skb->dev share the same location. */
dev = skb->dev;
@@ -423,6 +422,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
* We do the same here for IPv4 (and increment an snmp counter).
*/
+ err = -EINVAL;
/* Find out where to put this fragment. */
prev_tail = qp->q.fragments_tail;
if (!prev_tail)
@@ -431,7 +431,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
/* This is the common case: skb goes to the end. */
/* Detect and discard overlaps. */
if (offset < prev_tail->ip_defrag_offset + prev_tail->len)
- goto discard_qp;
+ goto overlap;
if (offset == prev_tail->ip_defrag_offset + prev_tail->len)
ip4_frag_append_to_last_run(&qp->q, skb);
else
@@ -450,7 +450,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
FRAG_CB(skb1)->frag_run_len)
rbn = &parent->rb_right;
else /* Found an overlap with skb1. */
- goto discard_qp;
+ goto overlap;
} while (*rbn);
/* Here we have parent properly set, and rbn pointing to
* one of its NULL left/right children. Insert skb.
@@ -487,16 +487,18 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
skb->_skb_refdst = 0UL;
err = ip_frag_reasm(qp, skb, prev_tail, dev);
skb->_skb_refdst = orefdst;
+ if (err)
+ inet_frag_kill(&qp->q);
return err;
}
skb_dst_drop(skb);
return -EINPROGRESS;
+overlap:
+ __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
discard_qp:
inet_frag_kill(&qp->q);
- err = -EINVAL;
- __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
err:
kfree_skb(skb);
return err;
@@ -621,7 +623,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
sub_frag_mem_limit(qp->q.net, head->truesize);
*nextp = NULL;
- head->next = NULL;
+ skb_mark_not_on_list(head);
head->prev = NULL;
head->dev = dev;
head->tstamp = qp->q.stamp;
@@ -820,7 +822,6 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
table[0].data = &net->ipv4.frags.high_thresh;
table[0].extra1 = &net->ipv4.frags.low_thresh;
- table[0].extra2 = &init_net.ipv4.frags.high_thresh;
table[1].data = &net->ipv4.frags.low_thresh;
table[1].extra2 = &net->ipv4.frags.high_thresh;
table[2].data = &net->ipv4.frags.timeout;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 8cce0e9ea08c..38befe829caf 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -232,22 +232,19 @@ static void gre_err(struct sk_buff *skb, u32 info)
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct tnl_ptk_info tpi;
- bool csum_err = false;
- if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP),
- iph->ihl * 4) < 0) {
- if (!csum_err) /* ignore csum errors. */
- return;
- }
+ if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
+ iph->ihl * 4) < 0)
+ return;
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- skb->dev->ifindex, 0, IPPROTO_GRE, 0);
+ skb->dev->ifindex, IPPROTO_GRE);
return;
}
if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
- IPPROTO_GRE, 0);
+ ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
+ IPPROTO_GRE);
return;
}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 3196cf58f418..35a786c0aaa0 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -531,11 +531,7 @@ static void ip_sublist_rcv_finish(struct list_head *head)
struct sk_buff *skb, *next;
list_for_each_entry_safe(skb, next, head, list) {
- list_del(&skb->list);
- /* Handle ip{6}_forward case, as sch_direct_xmit have
- * another kind of SKB-list usage (see validate_xmit_skb_list)
- */
- skb->next = NULL;
+ skb_list_del_init(skb);
dst_input(skb);
}
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 9c4e72e9c60a..c09219e7f230 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -278,7 +278,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
struct sk_buff *nskb = segs->next;
int err;
- segs->next = NULL;
+ skb_mark_not_on_list(segs);
err = ip_fragment(net, sk, segs, mtu, ip_finish_output2);
if (err && ret == 0)
@@ -684,7 +684,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
skb = frag;
frag = skb->next;
- skb->next = NULL;
+ skb_mark_not_on_list(skb);
}
if (err == 0) {
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index f38cb21d773d..de31b302d69c 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -318,9 +318,9 @@ static int vti4_err(struct sk_buff *skb, u32 info)
return 0;
if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
- ipv4_update_pmtu(skb, net, info, 0, 0, protocol, 0);
+ ipv4_update_pmtu(skb, net, info, 0, protocol);
else
- ipv4_redirect(skb, net, 0, 0, protocol, 0);
+ ipv4_redirect(skb, net, 0, protocol);
xfrm_state_put(x);
return 0;
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index d97f4f2787f5..9119d012ba46 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -48,9 +48,9 @@ static int ipcomp4_err(struct sk_buff *skb, u32 info)
return 0;
if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
- ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0);
+ ipv4_update_pmtu(skb, net, info, 0, IPPROTO_COMP);
else
- ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0);
+ ipv4_redirect(skb, net, 0, IPPROTO_COMP);
xfrm_state_put(x);
return 0;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c891235b4966..e65287c27e3d 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -175,13 +175,12 @@ static int ipip_err(struct sk_buff *skb, u32 info)
}
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
- ipv4_update_pmtu(skb, net, info, t->parms.link, 0,
- iph->protocol, 0);
+ ipv4_update_pmtu(skb, net, info, t->parms.link, iph->protocol);
goto out;
}
if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0);
+ ipv4_redirect(skb, net, t->parms.link, iph->protocol);
goto out;
}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5660adcf7a04..91b0d5671649 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2527,6 +2527,13 @@ errout_free:
static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
+ if (cb->strict_check) {
+ int err = ip_valid_fib_dump_req(cb->nlh, cb->extack);
+
+ if (err < 0)
+ return err;
+ }
+
return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
_ipmr_fill_mroute, &mfc_unres_lock);
}
@@ -2710,6 +2717,31 @@ static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb)
return true;
}
+static int ipmr_valid_dumplink(const struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct ifinfomsg *ifm;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+ NL_SET_ERR_MSG(extack, "ipv4: Invalid header for ipmr link dump");
+ return -EINVAL;
+ }
+
+ if (nlmsg_attrlen(nlh, sizeof(*ifm))) {
+ NL_SET_ERR_MSG(extack, "Invalid data after header in ipmr link dump");
+ return -EINVAL;
+ }
+
+ ifm = nlmsg_data(nlh);
+ if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
+ ifm->ifi_change || ifm->ifi_index) {
+ NL_SET_ERR_MSG(extack, "Invalid values in header for ipmr link dump request");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
@@ -2718,6 +2750,13 @@ static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb)
unsigned int e = 0, s_e;
struct mr_table *mrt;
+ if (cb->strict_check) {
+ int err = ipmr_valid_dumplink(cb->nlh, cb->extack);
+
+ if (err < 0)
+ return err;
+ }
+
s_t = cb->args[0];
s_e = cb->args[1];
diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c
index 04311f7067e2..6d218f5a2e71 100644
--- a/net/ipv4/metrics.c
+++ b/net/ipv4/metrics.c
@@ -5,8 +5,8 @@
#include <net/net_namespace.h>
#include <net/tcp.h>
-int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len,
- u32 *metrics)
+static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx,
+ int fc_mx_len, u32 *metrics)
{
bool ecn_ca = false;
struct nlattr *nla;
@@ -52,4 +52,28 @@ int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len,
return 0;
}
-EXPORT_SYMBOL_GPL(ip_metrics_convert);
+
+struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx,
+ int fc_mx_len)
+{
+ struct dst_metrics *fib_metrics;
+ int err;
+
+ if (!fc_mx)
+ return (struct dst_metrics *)&dst_default_metrics;
+
+ fib_metrics = kzalloc(sizeof(*fib_metrics), GFP_KERNEL);
+ if (unlikely(!fib_metrics))
+ return ERR_PTR(-ENOMEM);
+
+ err = ip_metrics_convert(net, fc_mx, fc_mx_len, fib_metrics->metrics);
+ if (!err) {
+ refcount_set(&fib_metrics->refcnt, 1);
+ } else {
+ kfree(fib_metrics);
+ fib_metrics = ERR_PTR(err);
+ }
+
+ return fib_metrics;
+}
+EXPORT_SYMBOL_GPL(ip_fib_metrics_init);
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 12843c9ef142..0b10d8812828 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -36,7 +36,6 @@ static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4,
const struct net_device *dev, u8 flags)
{
struct fib_result res;
- bool dev_match;
int ret __maybe_unused;
if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE))
@@ -46,21 +45,7 @@ static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4,
if (res.type != RTN_LOCAL || !(flags & XT_RPFILTER_ACCEPT_LOCAL))
return false;
}
- dev_match = false;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
- for (ret = 0; ret < res.fi->fib_nhs; ret++) {
- struct fib_nh *nh = &res.fi->fib_nh[ret];
-
- if (nh->nh_dev == dev) {
- dev_match = true;
- break;
- }
- }
-#else
- if (FIB_RES_DEV(res) == dev)
- dev_match = true;
-#endif
- return dev_match || flags & XT_RPFILTER_LOOSE;
+ return fib_info_nh_uses_dev(res.fi, dev) || flags & XT_RPFILTER_LOOSE;
}
static bool
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 6115bf1ff6f0..78a67f961d86 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -264,7 +264,6 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
return nf_nat_inet_fn(priv, skb, state);
}
-EXPORT_SYMBOL_GPL(nf_nat_ipv4_fn);
static unsigned int
nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index ad3aeff152ed..a9d5e013e555 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -104,12 +104,26 @@ static int masq_device_event(struct notifier_block *this,
return NOTIFY_DONE;
}
+static int inet_cmp(struct nf_conn *ct, void *ptr)
+{
+ struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
+ struct net_device *dev = ifa->ifa_dev->dev;
+ struct nf_conntrack_tuple *tuple;
+
+ if (!device_cmp(ct, (void *)(long)dev->ifindex))
+ return 0;
+
+ tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+ return ifa->ifa_address == tuple->dst.u3.ip;
+}
+
static int masq_inet_event(struct notifier_block *this,
unsigned long event,
void *ptr)
{
struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
- struct netdev_notifier_info info;
+ struct net *net = dev_net(idev->dev);
/* The masq_dev_notifier will catch the case of the device going
* down. So if the inetdev is dead and being destroyed we have
@@ -119,8 +133,10 @@ static int masq_inet_event(struct notifier_block *this,
if (idev->dead)
return NOTIFY_DONE;
- netdev_notifier_info_init(&info, idev->dev);
- return masq_device_event(this, event, &info);
+ if (event == NETDEV_DOWN)
+ nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0);
+
+ return NOTIFY_DONE;
}
static struct notifier_block masq_dev_notifier = {
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index e50976e3c213..94eb25bc8d7e 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -76,10 +76,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
.flowi4_iif = LOOPBACK_IFINDEX,
};
const struct net_device *oif;
- struct net_device *found;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
- int i;
-#endif
+ const struct net_device *found;
/*
* Do not set flowi4_oif, it restricts results (for example, asking
@@ -146,25 +143,13 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (!oif) {
found = FIB_RES_DEV(res);
- goto ok;
- }
-
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
- for (i = 0; i < res.fi->fib_nhs; i++) {
- struct fib_nh *nh = &res.fi->fib_nh[i];
+ } else {
+ if (!fib_info_nh_uses_dev(res.fi, oif))
+ return;
- if (nh->nh_dev == oif) {
- found = nh->nh_dev;
- goto ok;
- }
+ found = oif;
}
- return;
-#else
- found = FIB_RES_DEV(res);
- if (found != oif)
- return;
-#endif
-ok:
+
switch (priv->result) {
case NFT_FIB_RESULT_OIF:
*dest = found->ifindex;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 8d7aaf118a30..7ccb5f87f70b 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -779,7 +779,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
if (ipv4_is_multicast(daddr)) {
- if (!ipc.oif)
+ if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
ipc.oif = inet->mc_index;
if (!saddr)
saddr = inet->mc_addr;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 33df4d76db2d..8ca3eb06ba04 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -608,7 +608,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
tos |= RTO_ONLINK;
if (ipv4_is_multicast(daddr)) {
- if (!ipc.oif)
+ if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
ipc.oif = inet->mc_index;
if (!saddr)
saddr = inet->mc_addr;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8501554e96a4..c0a9d26c06ce 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1041,17 +1041,15 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
}
void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
- int oif, u32 mark, u8 protocol, int flow_flags)
+ int oif, u8 protocol)
{
const struct iphdr *iph = (const struct iphdr *) skb->data;
struct flowi4 fl4;
struct rtable *rt;
-
- if (!mark)
- mark = IP4_REPLY_MARK(net, skb->mark);
+ u32 mark = IP4_REPLY_MARK(net, skb->mark);
__build_flow_key(net, &fl4, NULL, iph, oif,
- RT_TOS(iph->tos), protocol, mark, flow_flags);
+ RT_TOS(iph->tos), protocol, mark, 0);
rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
__ip_rt_update_pmtu(rt, &fl4, mtu);
@@ -1133,14 +1131,14 @@ out:
EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
void ipv4_redirect(struct sk_buff *skb, struct net *net,
- int oif, u32 mark, u8 protocol, int flow_flags)
+ int oif, u8 protocol)
{
const struct iphdr *iph = (const struct iphdr *) skb->data;
struct flowi4 fl4;
struct rtable *rt;
__build_flow_key(net, &fl4, NULL, iph, oif,
- RT_TOS(iph->tos), protocol, mark, flow_flags);
+ RT_TOS(iph->tos), protocol, 0, 0);
rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
__ip_do_redirect(rt, skb, &fl4, false);
@@ -1220,18 +1218,15 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
src = ip_hdr(skb)->saddr;
else {
struct fib_result res;
- struct flowi4 fl4;
- struct iphdr *iph;
-
- iph = ip_hdr(skb);
-
- memset(&fl4, 0, sizeof(fl4));
- fl4.daddr = iph->daddr;
- fl4.saddr = iph->saddr;
- fl4.flowi4_tos = RT_TOS(iph->tos);
- fl4.flowi4_oif = rt->dst.dev->ifindex;
- fl4.flowi4_iif = skb->dev->ifindex;
- fl4.flowi4_mark = skb->mark;
+ struct iphdr *iph = ip_hdr(skb);
+ struct flowi4 fl4 = {
+ .daddr = iph->daddr,
+ .saddr = iph->saddr,
+ .flowi4_tos = RT_TOS(iph->tos),
+ .flowi4_oif = rt->dst.dev->ifindex,
+ .flowi4_iif = skb->dev->ifindex,
+ .flowi4_mark = skb->mark,
+ };
rcu_read_lock();
if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
@@ -1482,12 +1477,9 @@ void rt_del_uncached_list(struct rtable *rt)
static void ipv4_dst_destroy(struct dst_entry *dst)
{
- struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
struct rtable *rt = (struct rtable *)dst;
- if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
- kfree(p);
-
+ ip_dst_metrics_put(dst);
rt_del_uncached_list(rt);
}
@@ -1534,11 +1526,8 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
rt->rt_gateway = nh->nh_gw;
rt->rt_uses_gateway = 1;
}
- dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
- if (fi->fib_metrics != &dst_default_metrics) {
- rt->dst._metrics |= DST_METRICS_REFCOUNTED;
- refcount_inc(&fi->fib_metrics->refcnt);
- }
+ ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
+
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
@@ -2786,7 +2775,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct rtable *rt = NULL;
struct sk_buff *skb;
struct rtmsg *rtm;
- struct flowi4 fl4;
+ struct flowi4 fl4 = {};
__be32 dst = 0;
__be32 src = 0;
kuid_t uid;
@@ -2826,7 +2815,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (!skb)
return -ENOBUFS;
- memset(&fl4, 0, sizeof(fl4));
fl4.daddr = dst;
fl4.saddr = src;
fl4.flowi4_tos = rtm->rtm_tos;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index c3387dfd725b..606f868d9f3f 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -88,7 +88,7 @@ u64 cookie_init_timestamp(struct request_sock *req)
ts <<= TSBITS;
ts |= options;
}
- return (u64)ts * (USEC_PER_SEC / TCP_TS_HZ);
+ return (u64)ts * (NSEC_PER_SEC / TCP_TS_HZ);
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 10c6246396cc..43ef83b2330e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1295,7 +1295,7 @@ new_segment:
copy = size_goal;
/* All packets are restored as if they have
- * already been sent. skb_mstamp isn't set to
+ * already been sent. skb_mstamp_ns isn't set to
* avoid wrong rtt estimation.
*/
if (tp->repair)
@@ -1753,6 +1753,7 @@ static int tcp_zerocopy_receive(struct sock *sk,
struct vm_area_struct *vma;
struct sk_buff *skb = NULL;
struct tcp_sock *tp;
+ int inq;
int ret;
if (address & (PAGE_SIZE - 1) || address != zc->address)
@@ -1773,12 +1774,15 @@ static int tcp_zerocopy_receive(struct sock *sk,
tp = tcp_sk(sk);
seq = tp->copied_seq;
- zc->length = min_t(u32, zc->length, tcp_inq(sk));
+ inq = tcp_inq(sk);
+ zc->length = min_t(u32, zc->length, inq);
zc->length &= ~(PAGE_SIZE - 1);
-
- zap_page_range(vma, address, zc->length);
-
- zc->recv_skip_hint = 0;
+ if (zc->length) {
+ zap_page_range(vma, address, zc->length);
+ zc->recv_skip_hint = 0;
+ } else {
+ zc->recv_skip_hint = inq;
+ }
ret = 0;
while (length + PAGE_SIZE <= zc->length) {
if (zc->recv_skip_hint < PAGE_SIZE) {
@@ -1801,8 +1805,17 @@ static int tcp_zerocopy_receive(struct sock *sk,
frags++;
}
}
- if (frags->size != PAGE_SIZE || frags->page_offset)
+ if (frags->size != PAGE_SIZE || frags->page_offset) {
+ int remaining = zc->recv_skip_hint;
+
+ while (remaining && (frags->size != PAGE_SIZE ||
+ frags->page_offset)) {
+ remaining -= frags->size;
+ frags++;
+ }
+ zc->recv_skip_hint -= remaining;
break;
+ }
ret = vm_insert_page(vma, address + length,
skb_frag_page(frags));
if (ret)
@@ -2403,16 +2416,10 @@ adjudge_to_death:
sock_hold(sk);
sock_orphan(sk);
- /* It is the last release_sock in its life. It will remove backlog. */
- release_sock(sk);
-
-
- /* Now socket is owned by kernel and we acquire BH lock
- * to finish close. No need to check for user refs.
- */
local_bh_disable();
bh_lock_sock(sk);
- WARN_ON(sock_owned_by_user(sk));
+ /* remove backlog if any, without releasing ownership. */
+ __release_sock(sk);
percpu_counter_inc(sk->sk_prot->orphan_count);
@@ -2481,6 +2488,7 @@ adjudge_to_death:
out:
bh_unlock_sock(sk);
local_bh_enable();
+ release_sock(sk);
sock_put(sk);
}
EXPORT_SYMBOL(tcp_close);
@@ -2595,6 +2603,8 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->compressed_ack = 0;
tp->bytes_sent = 0;
tp->bytes_retrans = 0;
+ tp->duplicate_sack[0].start_seq = 0;
+ tp->duplicate_sack[0].end_seq = 0;
tp->dsack_dups = 0;
tp->reord_seen = 0;
@@ -3894,8 +3904,8 @@ void __init tcp_init(void)
init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
init_net.ipv4.sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
- init_net.ipv4.sysctl_tcp_rmem[1] = 87380;
- init_net.ipv4.sysctl_tcp_rmem[2] = max(87380, max_rshare);
+ init_net.ipv4.sysctl_tcp_rmem[1] = 131072;
+ init_net.ipv4.sysctl_tcp_rmem[2] = max(131072, max_rshare);
pr_info("Hash tables configured (established %u bind %u)\n",
tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 02ff2dde9609..a5786e3e2c16 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -128,6 +128,9 @@ static const u32 bbr_probe_rtt_mode_ms = 200;
/* Skip TSO below the following bandwidth (bits/sec): */
static const int bbr_min_tso_rate = 1200000;
+/* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck. */
+static const int bbr_pacing_marging_percent = 1;
+
/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain
* that will allow a smoothly increasing pacing rate that will double each RTT
* and send the same number of packets per RTT that an un-paced, slow-starting
@@ -208,12 +211,10 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
{
unsigned int mss = tcp_sk(sk)->mss_cache;
- if (!tcp_needs_internal_pacing(sk))
- mss = tcp_mss_to_mtu(sk, mss);
rate *= mss;
rate *= gain;
rate >>= BBR_SCALE;
- rate *= USEC_PER_SEC;
+ rate *= USEC_PER_SEC / 100 * (100 - bbr_pacing_marging_percent);
return rate >> BW_SCALE;
}
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index ca61e2a659e7..cd4814f7e962 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -44,6 +44,7 @@
#include <linux/mm.h>
#include <net/tcp.h>
#include <linux/inet_diag.h>
+#include "tcp_dctcp.h"
#define DCTCP_MAX_ALPHA 1024U
@@ -118,54 +119,6 @@ static u32 dctcp_ssthresh(struct sock *sk)
return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
}
-/* Minimal DCTP CE state machine:
- *
- * S: 0 <- last pkt was non-CE
- * 1 <- last pkt was CE
- */
-
-static void dctcp_ce_state_0_to_1(struct sock *sk)
-{
- struct dctcp *ca = inet_csk_ca(sk);
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (!ca->ce_state) {
- /* State has changed from CE=0 to CE=1, force an immediate
- * ACK to reflect the new CE state. If an ACK was delayed,
- * send that first to reflect the prior CE state.
- */
- if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
- __tcp_send_ack(sk, ca->prior_rcv_nxt);
- inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
- }
-
- ca->prior_rcv_nxt = tp->rcv_nxt;
- ca->ce_state = 1;
-
- tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
-}
-
-static void dctcp_ce_state_1_to_0(struct sock *sk)
-{
- struct dctcp *ca = inet_csk_ca(sk);
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (ca->ce_state) {
- /* State has changed from CE=1 to CE=0, force an immediate
- * ACK to reflect the new CE state. If an ACK was delayed,
- * send that first to reflect the prior CE state.
- */
- if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
- __tcp_send_ack(sk, ca->prior_rcv_nxt);
- inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
- }
-
- ca->prior_rcv_nxt = tp->rcv_nxt;
- ca->ce_state = 0;
-
- tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
-}
-
static void dctcp_update_alpha(struct sock *sk, u32 flags)
{
const struct tcp_sock *tp = tcp_sk(sk);
@@ -230,12 +183,12 @@ static void dctcp_state(struct sock *sk, u8 new_state)
static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
{
+ struct dctcp *ca = inet_csk_ca(sk);
+
switch (ev) {
case CA_EVENT_ECN_IS_CE:
- dctcp_ce_state_0_to_1(sk);
- break;
case CA_EVENT_ECN_NO_CE:
- dctcp_ce_state_1_to_0(sk);
+ dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state);
break;
default:
/* Don't care for the rest. */
diff --git a/net/ipv4/tcp_dctcp.h b/net/ipv4/tcp_dctcp.h
new file mode 100644
index 000000000000..d69a77cbd0c7
--- /dev/null
+++ b/net/ipv4/tcp_dctcp.h
@@ -0,0 +1,40 @@
+#ifndef _TCP_DCTCP_H
+#define _TCP_DCTCP_H
+
+static inline void dctcp_ece_ack_cwr(struct sock *sk, u32 ce_state)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (ce_state == 1)
+ tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+ else
+ tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+}
+
+/* Minimal DCTP CE state machine:
+ *
+ * S: 0 <- last pkt was non-CE
+ * 1 <- last pkt was CE
+ */
+static inline void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
+ u32 *prior_rcv_nxt, u32 *ce_state)
+{
+ u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0;
+
+ if (*ce_state != new_ce_state) {
+ /* CE state has changed, force an immediate ACK to
+ * reflect the new CE state. If an ACK was delayed,
+ * send that first to reflect the prior CE state.
+ */
+ if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
+ dctcp_ece_ack_cwr(sk, *ce_state);
+ __tcp_send_ack(sk, *prior_rcv_nxt);
+ }
+ inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
+ }
+ *prior_rcv_nxt = tcp_sk(sk)->rcv_nxt;
+ *ce_state = new_ce_state;
+ dctcp_ece_ack_cwr(sk, new_ce_state);
+}
+
+#endif
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 47e08c1b5bc3..188980c58f87 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -426,26 +426,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
}
}
-/* 3. Tuning rcvbuf, when connection enters established state. */
-static void tcp_fixup_rcvbuf(struct sock *sk)
-{
- u32 mss = tcp_sk(sk)->advmss;
- int rcvmem;
-
- rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) *
- tcp_default_init_rwnd(mss);
-
- /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency
- * Allow enough cushion so that sender is not limited by our window
- */
- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)
- rcvmem <<= 2;
-
- if (sk->sk_rcvbuf < rcvmem)
- sk->sk_rcvbuf = min(rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
-}
-
-/* 4. Try to fixup all. It is made immediately after connection enters
+/* 3. Try to fixup all. It is made immediately after connection enters
* established state.
*/
void tcp_init_buffer_space(struct sock *sk)
@@ -454,12 +435,10 @@ void tcp_init_buffer_space(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
int maxwin;
- if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
- tcp_fixup_rcvbuf(sk);
if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
tcp_sndbuf_expand(sk);
- tp->rcvq_space.space = tp->rcv_wnd;
+ tp->rcvq_space.space = min_t(u32, tp->rcv_wnd, TCP_INIT_CWND * tp->advmss);
tcp_mstamp_refresh(tp);
tp->rcvq_space.time = tp->tcp_mstamp;
tp->rcvq_space.seq = tp->copied_seq;
@@ -485,7 +464,7 @@ void tcp_init_buffer_space(struct sock *sk)
tp->snd_cwnd_stamp = tcp_jiffies32;
}
-/* 5. Recalculate window clamp after socket hit its memory bounds. */
+/* 4. Recalculate window clamp after socket hit its memory bounds. */
static void tcp_clamp_window(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -1305,7 +1284,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
*/
tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
start_seq, end_seq, dup_sack, pcount,
- skb->skb_mstamp);
+ tcp_skb_timestamp_us(skb));
tcp_rate_skb_delivered(sk, skb, state->rate);
if (skb == tp->lost_skb_hint)
@@ -1580,7 +1559,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
TCP_SKB_CB(skb)->end_seq,
dup_sack,
tcp_skb_pcount(skb),
- skb->skb_mstamp);
+ tcp_skb_timestamp_us(skb));
tcp_rate_skb_delivered(sk, skb, state->rate);
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
list_del_init(&skb->tcp_tsorted_anchor);
@@ -3103,7 +3082,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
tp->retrans_out -= acked_pcount;
flag |= FLAG_RETRANS_DATA_ACKED;
} else if (!(sacked & TCPCB_SACKED_ACKED)) {
- last_ackt = skb->skb_mstamp;
+ last_ackt = tcp_skb_timestamp_us(skb);
WARN_ON_ONCE(last_ackt == 0);
if (!first_ackt)
first_ackt = last_ackt;
@@ -3121,7 +3100,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
tp->delivered += acked_pcount;
if (!tcp_skb_spurious_retrans(tp, skb))
tcp_rack_advance(tp, sacked, scb->end_seq,
- skb->skb_mstamp);
+ tcp_skb_timestamp_us(skb));
}
if (sacked & TCPCB_LOST)
tp->lost_out -= acked_pcount;
@@ -3215,7 +3194,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
}
} else if (skb && rtt_update && sack_rtt_us >= 0 &&
- sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) {
+ sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp,
+ tcp_skb_timestamp_us(skb))) {
/* Do not re-arm RTO if the sack RTT is measured from data sent
* after when the head was last (re)transmitted. Otherwise the
* timeout may continue to extend in loss recovery.
@@ -4199,6 +4179,17 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
}
+static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
+{
+ /* When the ACK path fails or drops most ACKs, the sender would
+ * timeout and spuriously retransmit the same segment repeatedly.
+ * The receiver remembers and reflects via DSACKs. Leverage the
+ * DSACK state and change the txhash to re-route speculatively.
+ */
+ if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq)
+ sk_rethink_txhash(sk);
+}
+
static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -4211,6 +4202,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+ tcp_rcv_spurious_retrans(sk, skb);
if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
end_seq = tp->rcv_nxt;
tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
@@ -4755,6 +4747,7 @@ queue_and_out:
}
if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
+ tcp_rcv_spurious_retrans(sk, skb);
/* A retransmit, 2nd most common case. Force an immediate ack. */
NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cd426313a298..de47038afdf0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -544,7 +544,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
BUG_ON(!skb);
tcp_mstamp_refresh(tp);
- delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp);
+ delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb));
remaining = icsk->icsk_rto -
usecs_to_jiffies(delta_us);
@@ -2551,7 +2551,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_tw_reuse = 2;
cnt = tcp_hashinfo.ehash_mask + 1;
- net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
+ net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 597dbd749f05..059b67af28b1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -45,6 +45,22 @@
#include <trace/events/tcp.h>
+/* Refresh clocks of a TCP socket,
+ * ensuring monotically increasing values.
+ */
+void tcp_mstamp_refresh(struct tcp_sock *tp)
+{
+ u64 val = tcp_clock_ns();
+
+ /* departure time for next data packet */
+ if (val > tp->tcp_wstamp_ns)
+ tp->tcp_wstamp_ns = val;
+
+ val = div_u64(val, NSEC_PER_USEC);
+ if (val > tp->tcp_mstamp)
+ tp->tcp_mstamp = val;
+}
+
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
int push_one, gfp_t gfp);
@@ -179,21 +195,6 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
}
-
-u32 tcp_default_init_rwnd(u32 mss)
-{
- /* Initial receive window should be twice of TCP_INIT_CWND to
- * enable proper sending of new unsent data during fast recovery
- * (RFC 3517, Section 4, NextSeg() rule (2)). Further place a
- * limit when mss is larger than 1460.
- */
- u32 init_rwnd = TCP_INIT_CWND * 2;
-
- if (mss > 1460)
- init_rwnd = max((1460 * init_rwnd) / mss, 2U);
- return init_rwnd;
-}
-
/* Determine a window scaling and initial window to offer.
* Based on the assumption that the given amount of space
* will be offered. Store the results in the tp structure.
@@ -228,7 +229,10 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
else
- (*rcv_wnd) = space;
+ (*rcv_wnd) = min_t(u32, space, U16_MAX);
+
+ if (init_rcv_wnd)
+ *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
(*rcv_wscale) = 0;
if (wscale_ok) {
@@ -241,11 +245,6 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
(*rcv_wscale)++;
}
}
-
- if (!init_rcv_wnd) /* Use default unless specified otherwise */
- init_rcv_wnd = tcp_default_init_rwnd(mss);
- *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
-
/* Set the clamp no higher than max representable value */
(*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
}
@@ -977,28 +976,34 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
-static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
+static void tcp_internal_pacing(struct sock *sk)
{
- u64 len_ns;
- u32 rate;
-
if (!tcp_needs_internal_pacing(sk))
return;
- rate = sk->sk_pacing_rate;
- if (!rate || rate == ~0U)
- return;
-
- len_ns = (u64)skb->len * NSEC_PER_SEC;
- do_div(len_ns, rate);
hrtimer_start(&tcp_sk(sk)->pacing_timer,
- ktime_add_ns(ktime_get(), len_ns),
+ ns_to_ktime(tcp_sk(sk)->tcp_wstamp_ns),
HRTIMER_MODE_ABS_PINNED_SOFT);
sock_hold(sk);
}
-static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
+static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb)
{
- skb->skb_mstamp = tp->tcp_mstamp;
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
+ if (sk->sk_pacing_status != SK_PACING_NONE) {
+ u32 rate = sk->sk_pacing_rate;
+
+ /* Original sch_fq does not pace first 10 MSS
+ * Note that tp->data_segs_out overflows after 2^32 packets,
+ * this is a minor annoyance.
+ */
+ if (rate != ~0U && rate && tp->data_segs_out >= 10) {
+ tp->tcp_wstamp_ns += div_u64((u64)skb->len * NSEC_PER_SEC, rate);
+
+ tcp_internal_pacing(sk);
+ }
+ }
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
}
@@ -1045,7 +1050,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
if (unlikely(!skb))
return -ENOBUFS;
}
- skb->skb_mstamp = tp->tcp_mstamp;
+ skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
inet = inet_sk(sk);
tcb = TCP_SKB_CB(skb);
@@ -1137,7 +1142,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
tcp_event_data_sent(tp, sk);
tp->data_segs_out += tcp_skb_pcount(skb);
tp->bytes_sent += skb->len - tcp_header_size;
- tcp_internal_pacing(sk, skb);
}
if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
@@ -1149,8 +1153,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
- /* Our usage of tstamp should remain private */
- skb->tstamp = 0;
+ /* Leave earliest departure time in skb->tstamp (skb->skb_mstamp_ns) */
/* Cleanup our debris for IP stacks */
memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
@@ -1163,7 +1166,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
err = net_xmit_eval(err);
}
if (!err && oskb) {
- tcp_update_skb_after_send(tp, oskb);
+ tcp_update_skb_after_send(sk, oskb);
tcp_rate_skb_sent(sk, oskb);
}
return err;
@@ -1966,7 +1969,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
head = tcp_rtx_queue_head(sk);
if (!head)
goto send_now;
- age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
+ age = tcp_stamp_us_delta(tp->tcp_mstamp, tcp_skb_timestamp_us(head));
/* If next ACK is likely to come too late (half srtt), do not defer */
if (age < (tp->srtt_us >> 4))
goto send_now;
@@ -2312,7 +2315,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
/* "skb_mstamp" is used as a start point for the retransmit timer */
- tcp_update_skb_after_send(tp, skb);
+ tcp_update_skb_after_send(sk, skb);
goto repair; /* Skip network transmission */
}
@@ -2887,7 +2890,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
} tcp_skb_tsorted_restore(skb);
if (!err) {
- tcp_update_skb_after_send(tp, skb);
+ tcp_update_skb_after_send(sk, skb);
tcp_rate_skb_sent(sk, skb);
}
} else {
@@ -3205,10 +3208,10 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
memset(&opts, 0, sizeof(opts));
#ifdef CONFIG_SYN_COOKIES
if (unlikely(req->cookie_ts))
- skb->skb_mstamp = cookie_init_timestamp(req);
+ skb->skb_mstamp_ns = cookie_init_timestamp(req);
else
#endif
- skb->skb_mstamp = tcp_clock_us();
+ skb->skb_mstamp_ns = tcp_clock_ns();
#ifdef CONFIG_TCP_MD5SIG
rcu_read_lock();
@@ -3424,7 +3427,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
- syn->skb_mstamp = syn_data->skb_mstamp;
+ syn->skb_mstamp_ns = syn_data->skb_mstamp_ns;
/* Now full SYN+DATA was cloned and sent (or not),
* remove the SYN from the original skb (syn_data)
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 4dff40dad4dc..baed2186c7c6 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -55,8 +55,10 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
* bandwidth estimate.
*/
if (!tp->packets_out) {
- tp->first_tx_mstamp = skb->skb_mstamp;
- tp->delivered_mstamp = skb->skb_mstamp;
+ u64 tstamp_us = tcp_skb_timestamp_us(skb);
+
+ tp->first_tx_mstamp = tstamp_us;
+ tp->delivered_mstamp = tstamp_us;
}
TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp;
@@ -88,13 +90,12 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
rs->is_app_limited = scb->tx.is_app_limited;
rs->is_retrans = scb->sacked & TCPCB_RETRANS;
+ /* Record send time of most recently ACKed packet: */
+ tp->first_tx_mstamp = tcp_skb_timestamp_us(skb);
/* Find the duration of the "send phase" of this window: */
- rs->interval_us = tcp_stamp_us_delta(
- skb->skb_mstamp,
- scb->tx.first_tx_mstamp);
+ rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp,
+ scb->tx.first_tx_mstamp);
- /* Record send time of most recently ACKed packet: */
- tp->first_tx_mstamp = skb->skb_mstamp;
}
/* Mark off the skb delivered once it's sacked to avoid being
* used again when it's cumulatively acked. For acked packets
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index c81aadff769b..fdb715bdd2d1 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -50,7 +50,7 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk)
s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb, u32 reo_wnd)
{
return tp->rack.rtt_us + reo_wnd -
- tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp);
+ tcp_stamp_us_delta(tp->tcp_mstamp, tcp_skb_timestamp_us(skb));
}
/* RACK loss detection (IETF draft draft-ietf-tcpm-rack-01):
@@ -91,7 +91,8 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
!(scb->sacked & TCPCB_SACKED_RETRANS))
continue;
- if (!tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp,
+ if (!tcp_rack_sent_after(tp->rack.mstamp,
+ tcp_skb_timestamp_us(skb),
tp->rack.end_seq, scb->end_seq))
break;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 7fdf222a0bdf..61023d50cd60 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -360,7 +360,7 @@ static void tcp_probe_timer(struct sock *sk)
*/
start_ts = tcp_skb_timestamp(skb);
if (!start_ts)
- skb->skb_mstamp = tp->tcp_mstamp;
+ skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
else if (icsk->icsk_user_timeout &&
(s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout)
goto abort;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c32a4c16b7ff..cf8252d05a01 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1042,7 +1042,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
if (ipv4_is_multicast(daddr)) {
- if (!ipc.oif)
+ if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
ipc.oif = inet->mc_index;
if (!saddr)
saddr = inet->mc_addr;
@@ -1889,7 +1889,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
return 0;
}
-static DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
+DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
void udp_encap_enable(void)
{
static_branch_enable(&udp_encap_needed_key);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 0c0522b79b43..802f2bc00d69 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -405,7 +405,7 @@ static struct sk_buff *udp4_gro_receive(struct list_head *head,
{
struct udphdr *uh = udp_gro_udphdr(skb);
- if (unlikely(!uh))
+ if (unlikely(!uh) || !static_branch_unlikely(&udp_encap_needed_key))
goto flush;
/* Don't bother verifying checksum if we're going to flush anyway. */
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c63ccce6425f..2496b12bf721 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -666,6 +666,7 @@ errout:
static int inet6_netconf_dump_devconf(struct sk_buff *skb,
struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
int h, s_h;
int idx, s_idx;
@@ -673,6 +674,21 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb,
struct inet6_dev *idev;
struct hlist_head *head;
+ if (cb->strict_check) {
+ struct netlink_ext_ack *extack = cb->extack;
+ struct netconfmsg *ncm;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf dump request");
+ return -EINVAL;
+ }
+
+ if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid data after header in netconf dump request");
+ return -EINVAL;
+ }
+ }
+
s_h = cb->args[0];
s_idx = idx = cb->args[1];
@@ -692,7 +708,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb,
if (inet6_netconf_fill_devconf(skb, dev->ifindex,
&idev->cnf,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
+ nlh->nlmsg_seq,
RTM_NEWNETCONF,
NLM_F_MULTI,
NETCONFA_ALL) < 0) {
@@ -709,7 +725,7 @@ cont:
if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
+ nlh->nlmsg_seq,
RTM_NEWNETCONF, NLM_F_MULTI,
NETCONFA_ALL) < 0)
goto done;
@@ -720,7 +736,7 @@ cont:
if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
+ nlh->nlmsg_seq,
RTM_NEWNETCONF, NLM_F_MULTI,
NETCONFA_ALL) < 0)
goto done;
@@ -997,6 +1013,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
if (addr_type == IPV6_ADDR_ANY ||
addr_type & IPV6_ADDR_MULTICAST ||
(!(idev->dev->flags & IFF_LOOPBACK) &&
+ !netif_is_l3_master(idev->dev) &&
addr_type & IPV6_ADDR_LOOPBACK))
return ERR_PTR(-EADDRNOTAVAIL);
@@ -4489,6 +4506,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
[IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
[IFA_FLAGS] = { .len = sizeof(u32) },
[IFA_RT_PRIORITY] = { .len = sizeof(u32) },
+ [IFA_TARGET_NETNSID] = { .type = NLA_S32 },
};
static int
@@ -4791,19 +4809,39 @@ static inline int inet6_ifaddr_msgsize(void)
+ nla_total_size(4) /* IFA_RT_PRIORITY */;
}
+enum addr_type_t {
+ UNICAST_ADDR,
+ MULTICAST_ADDR,
+ ANYCAST_ADDR,
+};
+
+struct inet6_fill_args {
+ u32 portid;
+ u32 seq;
+ int event;
+ unsigned int flags;
+ int netnsid;
+ enum addr_type_t type;
+};
+
static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
- u32 portid, u32 seq, int event, unsigned int flags)
+ struct inet6_fill_args *args)
{
struct nlmsghdr *nlh;
u32 preferred, valid;
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
+ nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
+ sizeof(struct ifaddrmsg), args->flags);
if (!nlh)
return -EMSGSIZE;
put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),
ifa->idev->dev->ifindex);
+ if (args->netnsid >= 0 &&
+ nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
+ goto error;
+
if (!((ifa->flags&IFA_F_PERMANENT) &&
(ifa->prefered_lft == INFINITY_LIFE_TIME))) {
preferred = ifa->prefered_lft;
@@ -4853,7 +4891,7 @@ error:
}
static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
- u32 portid, u32 seq, int event, u16 flags)
+ struct inet6_fill_args *args)
{
struct nlmsghdr *nlh;
u8 scope = RT_SCOPE_UNIVERSE;
@@ -4862,10 +4900,15 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
scope = RT_SCOPE_SITE;
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
+ nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
+ sizeof(struct ifaddrmsg), args->flags);
if (!nlh)
return -EMSGSIZE;
+ if (args->netnsid >= 0 &&
+ nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
+ return -EMSGSIZE;
+
put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 ||
put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp,
@@ -4879,7 +4922,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
}
static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
- u32 portid, u32 seq, int event, unsigned int flags)
+ struct inet6_fill_args *args)
{
struct net_device *dev = fib6_info_nh_dev(ifaca->aca_rt);
int ifindex = dev ? dev->ifindex : 1;
@@ -4889,10 +4932,15 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
scope = RT_SCOPE_SITE;
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
+ nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
+ sizeof(struct ifaddrmsg), args->flags);
if (!nlh)
return -EMSGSIZE;
+ if (args->netnsid >= 0 &&
+ nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
+ return -EMSGSIZE;
+
put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 ||
put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp,
@@ -4905,16 +4953,11 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
return 0;
}
-enum addr_type_t {
- UNICAST_ADDR,
- MULTICAST_ADDR,
- ANYCAST_ADDR,
-};
-
/* called with rcu_read_lock() */
static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
- struct netlink_callback *cb, enum addr_type_t type,
- int s_ip_idx, int *p_ip_idx)
+ struct netlink_callback *cb,
+ int s_ip_idx, int *p_ip_idx,
+ struct inet6_fill_args *fillargs)
{
struct ifmcaddr6 *ifmca;
struct ifacaddr6 *ifaca;
@@ -4922,19 +4965,16 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
int ip_idx = *p_ip_idx;
read_lock_bh(&idev->lock);
- switch (type) {
+ switch (fillargs->type) {
case UNICAST_ADDR: {
struct inet6_ifaddr *ifa;
+ fillargs->event = RTM_NEWADDR;
/* unicast address incl. temp addr */
list_for_each_entry(ifa, &idev->addr_list, if_list) {
if (++ip_idx < s_ip_idx)
continue;
- err = inet6_fill_ifaddr(skb, ifa,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWADDR,
- NLM_F_MULTI);
+ err = inet6_fill_ifaddr(skb, ifa, fillargs);
if (err < 0)
break;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
@@ -4942,31 +4982,26 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
break;
}
case MULTICAST_ADDR:
+ fillargs->event = RTM_GETMULTICAST;
+
/* multicast address */
for (ifmca = idev->mc_list; ifmca;
ifmca = ifmca->next, ip_idx++) {
if (ip_idx < s_ip_idx)
continue;
- err = inet6_fill_ifmcaddr(skb, ifmca,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_GETMULTICAST,
- NLM_F_MULTI);
+ err = inet6_fill_ifmcaddr(skb, ifmca, fillargs);
if (err < 0)
break;
}
break;
case ANYCAST_ADDR:
+ fillargs->event = RTM_GETANYCAST;
/* anycast address */
for (ifaca = idev->ac_list; ifaca;
ifaca = ifaca->aca_next, ip_idx++) {
if (ip_idx < s_ip_idx)
continue;
- err = inet6_fill_ifacaddr(skb, ifaca,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_GETANYCAST,
- NLM_F_MULTI);
+ err = inet6_fill_ifacaddr(skb, ifaca, fillargs);
if (err < 0)
break;
}
@@ -4979,10 +5014,71 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
return err;
}
+static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
+ struct inet6_fill_args *fillargs,
+ struct net **tgt_net, struct sock *sk,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFA_MAX+1];
+ struct ifaddrmsg *ifm;
+ int err, i;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid header for address dump request");
+ return -EINVAL;
+ }
+
+ ifm = nlmsg_data(nlh);
+ if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for address dump request");
+ return -EINVAL;
+ }
+ if (ifm->ifa_index) {
+ NL_SET_ERR_MSG_MOD(extack, "Filter by device index not supported for address dump");
+ return -EINVAL;
+ }
+
+ err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
+ ifa_ipv6_policy, extack);
+ if (err < 0)
+ return err;
+
+ for (i = 0; i <= IFA_MAX; ++i) {
+ if (!tb[i])
+ continue;
+
+ if (i == IFA_TARGET_NETNSID) {
+ struct net *net;
+
+ fillargs->netnsid = nla_get_s32(tb[i]);
+ net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
+ if (IS_ERR(net)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid target network namespace id");
+ return PTR_ERR(net);
+ }
+ *tgt_net = net;
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in dump request");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
enum addr_type_t type)
{
+ const struct nlmsghdr *nlh = cb->nlh;
+ struct inet6_fill_args fillargs = {
+ .portid = NETLINK_CB(cb->skb).portid,
+ .seq = cb->nlh->nlmsg_seq,
+ .flags = NLM_F_MULTI,
+ .netnsid = -1,
+ .type = type,
+ };
struct net *net = sock_net(skb->sk);
+ struct net *tgt_net = net;
int h, s_h;
int idx, ip_idx;
int s_idx, s_ip_idx;
@@ -4994,11 +5090,20 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
s_idx = idx = cb->args[1];
s_ip_idx = ip_idx = cb->args[2];
+ if (cb->strict_check) {
+ int err;
+
+ err = inet6_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
+ skb->sk, cb->extack);
+ if (err < 0)
+ return err;
+ }
+
rcu_read_lock();
- cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ net->dev_base_seq;
+ cb->seq = atomic_read(&tgt_net->ipv6.dev_addr_genid) ^ tgt_net->dev_base_seq;
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
- head = &net->dev_index_head[h];
+ head = &tgt_net->dev_index_head[h];
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
@@ -5009,8 +5114,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
if (!idev)
goto cont;
- if (in6_dump_addrs(idev, skb, cb, type,
- s_ip_idx, &ip_idx) < 0)
+ if (in6_dump_addrs(idev, skb, cb, s_ip_idx, &ip_idx,
+ &fillargs) < 0)
goto done;
cont:
idx++;
@@ -5021,6 +5126,8 @@ done:
cb->args[0] = h;
cb->args[1] = idx;
cb->args[2] = ip_idx;
+ if (fillargs.netnsid >= 0)
+ put_net(tgt_net);
return skb->len;
}
@@ -5051,6 +5158,14 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(in_skb->sk);
+ struct inet6_fill_args fillargs = {
+ .portid = NETLINK_CB(in_skb).portid,
+ .seq = nlh->nlmsg_seq,
+ .event = RTM_NEWADDR,
+ .flags = 0,
+ .netnsid = -1,
+ };
+ struct net *tgt_net = net;
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
struct in6_addr *addr = NULL, *peer;
@@ -5064,15 +5179,24 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
+ if (tb[IFA_TARGET_NETNSID]) {
+ fillargs.netnsid = nla_get_s32(tb[IFA_TARGET_NETNSID]);
+
+ tgt_net = rtnl_get_net_ns_capable(NETLINK_CB(in_skb).sk,
+ fillargs.netnsid);
+ if (IS_ERR(tgt_net))
+ return PTR_ERR(tgt_net);
+ }
+
addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
if (!addr)
return -EINVAL;
ifm = nlmsg_data(nlh);
if (ifm->ifa_index)
- dev = dev_get_by_index(net, ifm->ifa_index);
+ dev = dev_get_by_index(tgt_net, ifm->ifa_index);
- ifa = ipv6_get_ifaddr(net, addr, dev, 1);
+ ifa = ipv6_get_ifaddr(tgt_net, addr, dev, 1);
if (!ifa) {
err = -EADDRNOTAVAIL;
goto errout;
@@ -5084,20 +5208,22 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout_ifa;
}
- err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, RTM_NEWADDR, 0);
+ err = inet6_fill_ifaddr(skb, ifa, &fillargs);
if (err < 0) {
/* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
goto errout_ifa;
}
- err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+ err = rtnl_unicast(skb, tgt_net, NETLINK_CB(in_skb).portid);
errout_ifa:
in6_ifa_put(ifa);
errout:
if (dev)
dev_put(dev);
+ if (fillargs.netnsid >= 0)
+ put_net(tgt_net);
+
return err;
}
@@ -5105,13 +5231,20 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
{
struct sk_buff *skb;
struct net *net = dev_net(ifa->idev->dev);
+ struct inet6_fill_args fillargs = {
+ .portid = 0,
+ .seq = 0,
+ .event = event,
+ .flags = 0,
+ .netnsid = -1,
+ };
int err = -ENOBUFS;
skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
if (!skb)
goto errout;
- err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0);
+ err = inet6_fill_ifaddr(skb, ifa, &fillargs);
if (err < 0) {
/* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
WARN_ON(err == -EMSGSIZE);
@@ -5527,6 +5660,31 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int inet6_valid_dump_ifinfo(const struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct ifinfomsg *ifm;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid header for link dump request");
+ return -EINVAL;
+ }
+
+ if (nlmsg_attrlen(nlh, sizeof(*ifm))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid data after header");
+ return -EINVAL;
+ }
+
+ ifm = nlmsg_data(nlh);
+ if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
+ ifm->ifi_change || ifm->ifi_index) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for dump request");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
@@ -5536,6 +5694,16 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
struct inet6_dev *idev;
struct hlist_head *head;
+ /* only requests using strict checking can pass data to
+ * influence the dump
+ */
+ if (cb->strict_check) {
+ int err = inet6_valid_dump_ifinfo(cb->nlh, cb->extack);
+
+ if (err < 0)
+ return err;
+ }
+
s_h = cb->args[0];
s_idx = cb->args[1];
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 1d6ced37ad71..0d1ee82ee55b 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -458,20 +458,52 @@ static int ip6addrlbl_fill(struct sk_buff *skb,
return 0;
}
+static int ip6addrlbl_valid_dump_req(const struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct ifaddrlblmsg *ifal;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifal))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid header for address label dump request");
+ return -EINVAL;
+ }
+
+ ifal = nlmsg_data(nlh);
+ if (ifal->__ifal_reserved || ifal->ifal_prefixlen ||
+ ifal->ifal_flags || ifal->ifal_index || ifal->ifal_seq) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for address label dump request");
+ return -EINVAL;
+ }
+
+ if (nlmsg_attrlen(nlh, sizeof(*ifal))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump requewst");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
struct ip6addrlbl_entry *p;
int idx = 0, s_idx = cb->args[0];
int err;
+ if (cb->strict_check) {
+ err = ip6addrlbl_valid_dump_req(nlh, cb->extack);
+ if (err < 0)
+ return err;
+ }
+
rcu_read_lock();
hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
if (idx >= s_idx) {
err = ip6addrlbl_fill(skb, p,
net->ipv6.ip6addrlbl_table.seq,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
+ nlh->nlmsg_seq,
RTM_NEWADDRLABEL,
NLM_F_MULTI);
if (err < 0)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 9a4261e50272..e9c8cfdf4b4c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -209,6 +209,7 @@ lookup_protocol:
np->hop_limit = -1;
np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
np->mc_loop = 1;
+ np->mc_all = 1;
np->pmtudisc = IPV6_PMTUDISC_WANT;
np->repflow = net->ipv6.sysctl.flowlabel_reflect;
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
@@ -467,12 +468,10 @@ void inet6_destroy_sock(struct sock *sk)
/* Release rx options */
skb = xchg(&np->pktoptions, NULL);
- if (skb)
- kfree_skb(skb);
+ kfree_skb(skb);
skb = xchg(&np->rxpmtu, NULL);
- if (skb)
- kfree_skb(skb);
+ kfree_skb(skb);
/* Free flowlabels */
fl6_free_socklist(sk);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 88a7579c23bd..63b2b66f9dfa 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -601,12 +601,11 @@ static void esp_input_done_esn(struct crypto_async_request *base, int err)
static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
{
- struct ip_esp_hdr *esph;
struct crypto_aead *aead = x->data;
struct aead_request *req;
struct sk_buff *trailer;
int ivlen = crypto_aead_ivsize(aead);
- int elen = skb->len - sizeof(*esph) - ivlen;
+ int elen = skb->len - sizeof(struct ip_esp_hdr) - ivlen;
int nfrags;
int assoclen;
int seqhilen;
@@ -616,7 +615,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
u8 *iv;
struct scatterlist *sg;
- if (!pskb_may_pull(skb, sizeof(*esph) + ivlen)) {
+ if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + ivlen)) {
ret = -EINVAL;
goto out;
}
@@ -626,7 +625,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
goto out;
}
- assoclen = sizeof(*esph);
+ assoclen = sizeof(struct ip_esp_hdr);
seqhilen = 0;
if (x->props.flags & XFRM_STATE_ESN) {
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index cbe46175bb59..0783af11b0b7 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -29,6 +29,7 @@
#include <linux/list.h>
#include <linux/slab.h>
+#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
@@ -46,6 +47,7 @@ struct fib6_cleaner {
int (*func)(struct fib6_info *, void *arg);
int sernum;
void *arg;
+ bool skip_notify;
};
#ifdef CONFIG_IPV6_SUBTREES
@@ -160,8 +162,6 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
}
INIT_LIST_HEAD(&f6i->fib6_siblings);
- f6i->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
-
atomic_inc(&f6i->fib6_ref);
return f6i;
@@ -171,7 +171,6 @@ void fib6_info_destroy_rcu(struct rcu_head *head)
{
struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
struct rt6_exception_bucket *bucket;
- struct dst_metrics *m;
WARN_ON(f6i->fib6_node);
@@ -205,9 +204,7 @@ void fib6_info_destroy_rcu(struct rcu_head *head)
if (f6i->fib6_nh.nh_dev)
dev_put(f6i->fib6_nh.nh_dev);
- m = f6i->fib6_metrics;
- if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
- kfree(m);
+ ip_fib_metrics_put(f6i->fib6_metrics);
kfree(f6i);
}
@@ -570,6 +567,7 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
unsigned int h, s_h;
unsigned int e = 0, s_e;
@@ -579,6 +577,13 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
struct hlist_head *head;
int res = 0;
+ if (cb->strict_check) {
+ int err = ip_valid_fib_dump_req(nlh, cb->extack);
+
+ if (err < 0)
+ return err;
+ }
+
s_h = cb->args[0];
s_e = cb->args[1];
@@ -1954,6 +1959,7 @@ static int fib6_clean_node(struct fib6_walker *w)
struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w);
struct nl_info info = {
.nl_net = c->net,
+ .skip_notify = c->skip_notify,
};
if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
@@ -2005,7 +2011,7 @@ static int fib6_clean_node(struct fib6_walker *w)
static void fib6_clean_tree(struct net *net, struct fib6_node *root,
int (*func)(struct fib6_info *, void *arg),
- int sernum, void *arg)
+ int sernum, void *arg, bool skip_notify)
{
struct fib6_cleaner c;
@@ -2017,13 +2023,14 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
c.sernum = sernum;
c.arg = arg;
c.net = net;
+ c.skip_notify = skip_notify;
fib6_walk(net, &c.w);
}
static void __fib6_clean_all(struct net *net,
int (*func)(struct fib6_info *, void *),
- int sernum, void *arg)
+ int sernum, void *arg, bool skip_notify)
{
struct fib6_table *table;
struct hlist_head *head;
@@ -2035,7 +2042,7 @@ static void __fib6_clean_all(struct net *net,
hlist_for_each_entry_rcu(table, head, tb6_hlist) {
spin_lock_bh(&table->tb6_lock);
fib6_clean_tree(net, &table->tb6_root,
- func, sernum, arg);
+ func, sernum, arg, skip_notify);
spin_unlock_bh(&table->tb6_lock);
}
}
@@ -2045,14 +2052,21 @@ static void __fib6_clean_all(struct net *net,
void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *),
void *arg)
{
- __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg);
+ __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, false);
+}
+
+void fib6_clean_all_skip_notify(struct net *net,
+ int (*func)(struct fib6_info *, void *),
+ void *arg)
+{
+ __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, true);
}
static void fib6_flush_trees(struct net *net)
{
int new_sernum = fib6_new_sernum(net);
- __fib6_clean_all(net, NULL, new_sernum, NULL);
+ __fib6_clean_all(net, NULL, new_sernum, NULL, false);
}
/*
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index e493b041d4ac..515adbdba1d2 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -427,35 +427,17 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
- const struct gre_base_hdr *greh;
const struct ipv6hdr *ipv6h;
- int grehlen = sizeof(*greh);
+ struct tnl_ptk_info tpi;
struct ip6_tnl *t;
- int key_off = 0;
- __be16 flags;
- __be32 key;
- if (!pskb_may_pull(skb, offset + grehlen))
- return;
- greh = (const struct gre_base_hdr *)(skb->data + offset);
- flags = greh->flags;
- if (flags & (GRE_VERSION | GRE_ROUTING))
+ if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IPV6),
+ offset) < 0)
return;
- if (flags & GRE_CSUM)
- grehlen += 4;
- if (flags & GRE_KEY) {
- key_off = grehlen + offset;
- grehlen += 4;
- }
- if (!pskb_may_pull(skb, offset + grehlen))
- return;
ipv6h = (const struct ipv6hdr *)skb->data;
- greh = (const struct gre_base_hdr *)(skb->data + offset);
- key = key_off ? *(__be32 *)(skb->data + key_off) : 0;
-
t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
- key, greh->protocol);
+ tpi.key, tpi.proto);
if (!t)
return;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 6242682be876..96577e742afd 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -178,7 +178,8 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
*/
if ((ipv6_addr_loopback(&hdr->saddr) ||
ipv6_addr_loopback(&hdr->daddr)) &&
- !(dev->flags & IFF_LOOPBACK))
+ !(dev->flags & IFF_LOOPBACK) &&
+ !netif_is_l3_master(dev))
goto err;
/* RFC4291 Errata ID: 3480
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f9f8f554d141..89e0d5118afe 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -725,7 +725,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
skb = frag;
frag = skb->next;
- skb->next = NULL;
+ skb_mark_not_on_list(skb);
}
kfree(tmp_hdr);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index d0b7e0249c13..d7563ef76518 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -85,7 +85,8 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
static void ip6mr_free_table(struct mr_table *mrt);
static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc6_cache *cache);
+ struct net_device *dev, struct sk_buff *skb,
+ struct mfc6_cache *cache);
static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert);
static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
@@ -138,6 +139,9 @@ static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
.flags = FIB_LOOKUP_NOREF,
};
+ /* update flow if oif or iif point to device enslaved to l3mdev */
+ l3mdev_update_flow(net, flowi6_to_flowi(flp6));
+
err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
flowi6_to_flowi(flp6), 0, &arg);
if (err < 0)
@@ -164,7 +168,9 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
return -EINVAL;
}
- mrt = ip6mr_get_table(rule->fr_net, rule->table);
+ arg->table = fib_rule_get_table(rule, arg);
+
+ mrt = ip6mr_get_table(rule->fr_net, arg->table);
if (!mrt)
return -EAGAIN;
res->mrt = mrt;
@@ -1014,7 +1020,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
}
rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
} else
- ip6_mr_forward(net, mrt, skb, c);
+ ip6_mr_forward(net, mrt, skb->dev, skb, c);
}
}
@@ -1120,7 +1126,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
/* Queue a packet for resolution. It gets locked cache entry! */
static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
- struct sk_buff *skb)
+ struct sk_buff *skb, struct net_device *dev)
{
struct mfc6_cache *c;
bool found = false;
@@ -1180,6 +1186,10 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
kfree_skb(skb);
err = -ENOBUFS;
} else {
+ if (dev) {
+ skb->dev = dev;
+ skb->skb_iif = dev->ifindex;
+ }
skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
err = 0;
}
@@ -2043,11 +2053,12 @@ static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
}
static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc6_cache *c)
+ struct net_device *dev, struct sk_buff *skb,
+ struct mfc6_cache *c)
{
int psend = -1;
int vif, ct;
- int true_vifi = ip6mr_find_vif(mrt, skb->dev);
+ int true_vifi = ip6mr_find_vif(mrt, dev);
vif = c->_c.mfc_parent;
c->_c.mfc_un.res.pkt++;
@@ -2073,7 +2084,7 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
/*
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
- if (mrt->vif_table[vif].dev != skb->dev) {
+ if (mrt->vif_table[vif].dev != dev) {
c->_c.mfc_un.res.wrong_if++;
if (true_vifi >= 0 && mrt->mroute_do_assert &&
@@ -2154,6 +2165,19 @@ int ip6_mr_input(struct sk_buff *skb)
.flowi6_mark = skb->mark,
};
int err;
+ struct net_device *dev;
+
+ /* skb->dev passed in is the master dev for vrfs.
+ * Get the proper interface that does have a vif associated with it.
+ */
+ dev = skb->dev;
+ if (netif_is_l3_master(skb->dev)) {
+ dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
+ if (!dev) {
+ kfree_skb(skb);
+ return -ENODEV;
+ }
+ }
err = ip6mr_fib_lookup(net, &fl6, &mrt);
if (err < 0) {
@@ -2165,7 +2189,7 @@ int ip6_mr_input(struct sk_buff *skb)
cache = ip6mr_cache_find(mrt,
&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
if (!cache) {
- int vif = ip6mr_find_vif(mrt, skb->dev);
+ int vif = ip6mr_find_vif(mrt, dev);
if (vif >= 0)
cache = ip6mr_cache_find_any(mrt,
@@ -2179,9 +2203,9 @@ int ip6_mr_input(struct sk_buff *skb)
if (!cache) {
int vif;
- vif = ip6mr_find_vif(mrt, skb->dev);
+ vif = ip6mr_find_vif(mrt, dev);
if (vif >= 0) {
- int err = ip6mr_cache_unresolved(mrt, vif, skb);
+ int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
read_unlock(&mrt_lock);
return err;
@@ -2191,7 +2215,7 @@ int ip6_mr_input(struct sk_buff *skb)
return -ENODEV;
}
- ip6_mr_forward(net, mrt, skb, cache);
+ ip6_mr_forward(net, mrt, dev, skb, cache);
read_unlock(&mrt_lock);
@@ -2257,7 +2281,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
iph->saddr = rt->rt6i_src.addr;
iph->daddr = rt->rt6i_dst.addr;
- err = ip6mr_cache_unresolved(mrt, vif, skb2);
+ err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
read_unlock(&mrt_lock);
return err;
@@ -2433,6 +2457,15 @@ errout:
static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
+
+ if (cb->strict_check) {
+ int err = ip_valid_fib_dump_req(nlh, cb->extack);
+
+ if (err < 0)
+ return err;
+ }
+
return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
_ip6mr_fill_mroute, &mfc_unres_lock);
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index c0cac9cc3a28..381ce38940ae 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -674,6 +674,13 @@ done:
retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr);
break;
}
+ case IPV6_MULTICAST_ALL:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ np->mc_all = valbool;
+ retv = 0;
+ break;
+
case MCAST_JOIN_GROUP:
case MCAST_LEAVE_GROUP:
{
@@ -1266,6 +1273,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = np->mcast_oif;
break;
+ case IPV6_MULTICAST_ALL:
+ val = np->mc_all;
+ break;
+
case IPV6_UNICAST_IF:
val = (__force int)htonl((__u32) np->ucast_oif);
break;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 4ae54aaca373..6895e1dc0b03 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -636,7 +636,7 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
}
if (!mc) {
rcu_read_unlock();
- return true;
+ return np->mc_all;
}
read_lock(&mc->sflock);
psl = mc->sflist;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 0ec273997d1d..a25cfdd47c89 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1533,7 +1533,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
if (!ndopts.nd_opts_rh) {
ip6_redirect_no_header(skb, dev_net(skb->dev),
- skb->dev->ifindex, 0);
+ skb->dev->ifindex);
return;
}
@@ -1784,6 +1784,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
change_info = ptr;
if (change_info->flags_changed & IFF_NOARP)
neigh_changeaddr(&nd_tbl, dev);
+ if (!netif_carrier_ok(dev))
+ neigh_carrier_down(&nd_tbl, dev);
break;
case NETDEV_DOWN:
neigh_ifdown(&nd_tbl, dev);
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 8b147440fbdc..af737b47b9b5 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -65,7 +65,10 @@ ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par)
}
hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
- BUG_ON(hp == NULL);
+ if (!hp) {
+ par->hotdrop = true;
+ return false;
+ }
/* Calculate the header length */
if (nexthdr == NEXTHDR_FRAGMENT)
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 2c99b94eeca3..21bf6bf04323 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -137,7 +137,10 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
sizeof(_addr),
&_addr);
- BUG_ON(ap == NULL);
+ if (ap == NULL) {
+ par->hotdrop = true;
+ return false;
+ }
if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) {
pr_debug("i=%d temp=%d;\n", i, temp);
@@ -166,7 +169,10 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+ temp * sizeof(_addr),
sizeof(_addr),
&_addr);
- BUG_ON(ap == NULL);
+ if (ap == NULL) {
+ par->hotdrop = true;
+ return false;
+ }
if (!ipv6_addr_equal(ap, &rtinfo->addrs[temp]))
break;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 8f68a518d9db..b8ac369f98ad 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -450,7 +450,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic
sub_frag_mem_limit(fq->q.net, head->truesize);
head->ignore_df = 1;
- head->next = NULL;
+ skb_mark_not_on_list(head);
head->dev = dev;
head->tstamp = fq->q.stamp;
ipv6_hdr(head)->payload_len = htons(payload_len);
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index e6eb7cf9b54f..3e4bf2286abe 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -87,18 +87,30 @@ static struct notifier_block masq_dev_notifier = {
struct masq_dev_work {
struct work_struct work;
struct net *net;
+ struct in6_addr addr;
int ifindex;
};
+static int inet_cmp(struct nf_conn *ct, void *work)
+{
+ struct masq_dev_work *w = (struct masq_dev_work *)work;
+ struct nf_conntrack_tuple *tuple;
+
+ if (!device_cmp(ct, (void *)(long)w->ifindex))
+ return 0;
+
+ tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+ return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6);
+}
+
static void iterate_cleanup_work(struct work_struct *work)
{
struct masq_dev_work *w;
- long index;
w = container_of(work, struct masq_dev_work, work);
- index = w->ifindex;
- nf_ct_iterate_cleanup_net(w->net, device_cmp, (void *)index, 0, 0);
+ nf_ct_iterate_cleanup_net(w->net, inet_cmp, (void *)w, 0, 0);
put_net(w->net);
kfree(w);
@@ -147,6 +159,7 @@ static int masq_inet_event(struct notifier_block *this,
INIT_WORK(&w->work, iterate_cleanup_work);
w->ifindex = dev->ifindex;
w->net = net;
+ w->addr = ifa->addr;
schedule_work(&w->work);
return NOTIFY_DONE;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 5c5b4f79296e..5c3c92713096 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -145,7 +145,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
*/
if (end < fq->q.len ||
((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
- goto err;
+ goto discard_fq;
fq->q.flags |= INET_FRAG_LAST_IN;
fq->q.len = end;
} else {
@@ -162,20 +162,20 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
if (fq->q.flags & INET_FRAG_LAST_IN)
- goto err;
+ goto discard_fq;
fq->q.len = end;
}
}
if (end == offset)
- goto err;
+ goto discard_fq;
/* Point into the IP datagram 'data' part. */
if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
- goto err;
+ goto discard_fq;
if (pskb_trim_rcsum(skb, end - offset))
- goto err;
+ goto discard_fq;
/* Find out which fragments are in front and at the back of us
* in the chain of fragments so far. We must know where to put
@@ -388,7 +388,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
}
sub_frag_mem_limit(fq->q.net, sum_truesize);
- head->next = NULL;
+ skb_mark_not_on_list(head);
head->dev = dev;
head->tstamp = fq->q.stamp;
ipv6_hdr(head)->payload_len = htons(payload_len);
@@ -418,6 +418,7 @@ out_fail:
rcu_read_lock();
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
rcu_read_unlock();
+ inet_frag_kill(&fq->q);
return -1;
}
@@ -553,7 +554,6 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
table[0].data = &net->ipv6.frags.high_thresh;
table[0].extra1 = &net->ipv6.frags.low_thresh;
- table[0].extra2 = &init_net.ipv6.frags.high_thresh;
table[1].data = &net->ipv6.frags.low_thresh;
table[1].extra2 = &net->ipv6.frags.high_thresh;
table[2].data = &net->ipv6.frags.timeout;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a366c05a239d..f4e08b0689a8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -364,14 +364,11 @@ EXPORT_SYMBOL(ip6_dst_alloc);
static void ip6_dst_destroy(struct dst_entry *dst)
{
- struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
struct rt6_info *rt = (struct rt6_info *)dst;
struct fib6_info *from;
struct inet6_dev *idev;
- if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
- kfree(p);
-
+ ip_dst_metrics_put(dst);
rt6_uncached_list_del(rt);
idev = rt->rt6i_idev;
@@ -978,11 +975,7 @@ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
{
rt->rt6i_flags &= ~RTF_EXPIRES;
rcu_assign_pointer(rt->from, from);
- dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
- if (from->fib6_metrics != &dst_default_metrics) {
- rt->dst._metrics |= DST_METRICS_REFCOUNTED;
- refcount_inc(&from->fib6_metrics->refcnt);
- }
+ ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
}
/* Caller must already hold reference to @ort */
@@ -1000,7 +993,6 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
#ifdef CONFIG_IPV6_SUBTREES
rt->rt6i_src = ort->fib6_src;
#endif
- rt->rt6i_prefsrc = ort->fib6_prefsrc;
}
static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
@@ -1454,11 +1446,6 @@ static int rt6_insert_exception(struct rt6_info *nrt,
if (ort->fib6_src.plen)
src_key = &nrt->rt6i_src.addr;
#endif
-
- /* Update rt6i_prefsrc as it could be changed
- * in rt6_remove_prefsrc()
- */
- nrt->rt6i_prefsrc = ort->fib6_prefsrc;
/* rt6_mtu_change() might lower mtu on ort.
* Only insert this exception route if its mtu
* is less than ort's mtu value.
@@ -1640,25 +1627,6 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
rcu_read_unlock();
}
-static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
-{
- struct rt6_exception_bucket *bucket;
- struct rt6_exception *rt6_ex;
- int i;
-
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
-
- if (bucket) {
- for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
- hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
- rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
- }
- bucket++;
- }
- }
-}
-
static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
struct rt6_info *rt, int mtu)
{
@@ -2103,7 +2071,8 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
{
bool any_src;
- if (rt6_need_strict(&fl6->daddr)) {
+ if (ipv6_addr_type(&fl6->daddr) &
+ (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
struct dst_entry *dst;
dst = l3mdev_link_scope_lookup(net, fl6);
@@ -2373,15 +2342,14 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
{
const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
struct dst_entry *dst;
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_oif = oif;
- fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
- fl6.daddr = iph->daddr;
- fl6.saddr = iph->saddr;
- fl6.flowlabel = ip6_flowinfo(iph);
- fl6.flowi6_uid = uid;
+ struct flowi6 fl6 = {
+ .flowi6_oif = oif,
+ .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
+ .daddr = iph->daddr,
+ .saddr = iph->saddr,
+ .flowlabel = ip6_flowinfo(iph),
+ .flowi6_uid = uid,
+ };
dst = ip6_route_output(net, NULL, &fl6);
if (!dst->error)
@@ -2532,16 +2500,15 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
{
const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
struct dst_entry *dst;
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_iif = LOOPBACK_IFINDEX;
- fl6.flowi6_oif = oif;
- fl6.flowi6_mark = mark;
- fl6.daddr = iph->daddr;
- fl6.saddr = iph->saddr;
- fl6.flowlabel = ip6_flowinfo(iph);
- fl6.flowi6_uid = uid;
+ struct flowi6 fl6 = {
+ .flowi6_iif = LOOPBACK_IFINDEX,
+ .flowi6_oif = oif,
+ .flowi6_mark = mark,
+ .daddr = iph->daddr,
+ .saddr = iph->saddr,
+ .flowlabel = ip6_flowinfo(iph),
+ .flowi6_uid = uid,
+ };
dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
rt6_do_redirect(dst, NULL, skb);
@@ -2549,21 +2516,18 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
}
EXPORT_SYMBOL_GPL(ip6_redirect);
-void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
- u32 mark)
+void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
struct dst_entry *dst;
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_iif = LOOPBACK_IFINDEX;
- fl6.flowi6_oif = oif;
- fl6.flowi6_mark = mark;
- fl6.daddr = msg->dest;
- fl6.saddr = iph->daddr;
- fl6.flowi6_uid = sock_net_uid(net, NULL);
+ struct flowi6 fl6 = {
+ .flowi6_iif = LOOPBACK_IFINDEX,
+ .flowi6_oif = oif,
+ .daddr = msg->dest,
+ .saddr = iph->daddr,
+ .flowi6_uid = sock_net_uid(net, NULL),
+ };
dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
rt6_do_redirect(dst, NULL, skb);
@@ -2734,24 +2698,6 @@ out:
return entries > rt_max_size;
}
-static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
- struct fib6_config *cfg)
-{
- struct dst_metrics *p;
-
- if (!cfg->fc_mx)
- return 0;
-
- p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL);
- if (unlikely(!p))
- return -ENOMEM;
-
- refcount_set(&p->refcnt, 1);
- rt->fib6_metrics = p;
-
- return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);
-}
-
static struct rt6_info *ip6_nh_lookup_table(struct net *net,
struct fib6_config *cfg,
const struct in6_addr *gw_addr,
@@ -3027,13 +2973,17 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
if (!rt)
goto out;
+ rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len);
+ if (IS_ERR(rt->fib6_metrics)) {
+ err = PTR_ERR(rt->fib6_metrics);
+ /* Do not leave garbage there. */
+ rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
+ goto out;
+ }
+
if (cfg->fc_flags & RTF_ADDRCONF)
rt->dst_nocount = true;
- err = ip6_convert_metrics(net, rt, cfg);
- if (err < 0)
- goto out;
-
if (cfg->fc_flags & RTF_EXPIRES)
fib6_set_expires(rt, jiffies +
clock_t_to_jiffies(cfg->fc_expires));
@@ -3142,8 +3092,6 @@ install_route:
rt->fib6_nh.nh_dev = dev;
rt->fib6_table = table;
- cfg->fc_nlinfo.nl_net = dev_net(dev);
-
if (idev)
in6_dev_put(idev);
@@ -3635,23 +3583,23 @@ static void rtmsg_to_fib6_config(struct net *net,
struct in6_rtmsg *rtmsg,
struct fib6_config *cfg)
{
- memset(cfg, 0, sizeof(*cfg));
+ *cfg = (struct fib6_config){
+ .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
+ : RT6_TABLE_MAIN,
+ .fc_ifindex = rtmsg->rtmsg_ifindex,
+ .fc_metric = rtmsg->rtmsg_metric,
+ .fc_expires = rtmsg->rtmsg_info,
+ .fc_dst_len = rtmsg->rtmsg_dst_len,
+ .fc_src_len = rtmsg->rtmsg_src_len,
+ .fc_flags = rtmsg->rtmsg_flags,
+ .fc_type = rtmsg->rtmsg_type,
- cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
- : RT6_TABLE_MAIN;
- cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
- cfg->fc_metric = rtmsg->rtmsg_metric;
- cfg->fc_expires = rtmsg->rtmsg_info;
- cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
- cfg->fc_src_len = rtmsg->rtmsg_src_len;
- cfg->fc_flags = rtmsg->rtmsg_flags;
- cfg->fc_type = rtmsg->rtmsg_type;
-
- cfg->fc_nlinfo.nl_net = net;
+ .fc_nlinfo.nl_net = net,
- cfg->fc_dst = rtmsg->rtmsg_dst;
- cfg->fc_src = rtmsg->rtmsg_src;
- cfg->fc_gateway = rtmsg->rtmsg_gateway;
+ .fc_dst = rtmsg->rtmsg_dst,
+ .fc_src = rtmsg->rtmsg_src,
+ .fc_gateway = rtmsg->rtmsg_gateway,
+ };
}
int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
@@ -3758,6 +3706,7 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
if (!f6i)
return ERR_PTR(-ENOMEM);
+ f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0);
f6i->dst_nocount = true;
f6i->dst_host = true;
f6i->fib6_protocol = RTPROT_KERNEL;
@@ -3800,8 +3749,6 @@ static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
spin_lock_bh(&rt6_exception_lock);
/* remove prefsrc entry */
rt->fib6_prefsrc.plen = 0;
- /* need to update cache as well */
- rt6_exceptions_remove_prefsrc(rt);
spin_unlock_bh(&rt6_exception_lock);
}
return 0;
@@ -4079,8 +4026,12 @@ void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
.event = event,
},
};
+ struct net *net = dev_net(dev);
- fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
+ if (net->ipv6.sysctl.skip_notify_on_dev_down)
+ fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
+ else
+ fib6_clean_all(net, fib6_ifdown, &arg);
}
void rt6_disable_ip(struct net_device *dev, unsigned long event)
@@ -4170,20 +4121,25 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
int err;
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
- NULL);
+ extack);
if (err < 0)
goto errout;
err = -EINVAL;
rtm = nlmsg_data(nlh);
- memset(cfg, 0, sizeof(*cfg));
- cfg->fc_table = rtm->rtm_table;
- cfg->fc_dst_len = rtm->rtm_dst_len;
- cfg->fc_src_len = rtm->rtm_src_len;
- cfg->fc_flags = RTF_UP;
- cfg->fc_protocol = rtm->rtm_protocol;
- cfg->fc_type = rtm->rtm_type;
+ *cfg = (struct fib6_config){
+ .fc_table = rtm->rtm_table,
+ .fc_dst_len = rtm->rtm_dst_len,
+ .fc_src_len = rtm->rtm_src_len,
+ .fc_flags = RTF_UP,
+ .fc_protocol = rtm->rtm_protocol,
+ .fc_type = rtm->rtm_type,
+
+ .fc_nlinfo.portid = NETLINK_CB(skb).portid,
+ .fc_nlinfo.nlh = nlh,
+ .fc_nlinfo.nl_net = sock_net(skb->sk),
+ };
if (rtm->rtm_type == RTN_UNREACHABLE ||
rtm->rtm_type == RTN_BLACKHOLE ||
@@ -4199,10 +4155,6 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
- cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
- cfg->fc_nlinfo.nlh = nlh;
- cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
-
if (tb[RTA_GATEWAY]) {
cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
cfg->fc_flags |= RTF_GATEWAY;
@@ -4850,7 +4802,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct rt6_info *rt;
struct sk_buff *skb;
struct rtmsg *rtm;
- struct flowi6 fl6;
+ struct flowi6 fl6 = {};
bool fibmatch;
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
@@ -4859,7 +4811,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout;
err = -EINVAL;
- memset(&fl6, 0, sizeof(fl6));
rtm = nlmsg_data(nlh);
fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
@@ -5084,7 +5035,10 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
return 0;
}
-struct ctl_table ipv6_route_table_template[] = {
+static int zero;
+static int one = 1;
+
+static struct ctl_table ipv6_route_table_template[] = {
{
.procname = "flush",
.data = &init_net.ipv6.sysctl.flush_delay,
@@ -5155,6 +5109,15 @@ struct ctl_table ipv6_route_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec_ms_jiffies,
},
+ {
+ .procname = "skip_notify_on_dev_down",
+ .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
{ }
};
@@ -5178,6 +5141,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
+ table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns)
@@ -5242,6 +5206,7 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
+ net->ipv6.sysctl.skip_notify_on_dev_down = 0;
net->ipv6.ip6_rt_gc_expire = 30*HZ;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e9400ffa7875..51c9f75f34b9 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -534,13 +534,13 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->parms.link, 0, iph->protocol, 0);
+ t->parms.link, iph->protocol);
err = 0;
goto out;
}
if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
- iph->protocol, 0);
+ ipv4_redirect(skb, dev_net(skb->dev), t->parms.link,
+ iph->protocol);
err = 0;
goto out;
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 28c4aa5078fc..374e7d302f26 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -548,7 +548,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
__udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
}
-static DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
void udpv6_encap_enable(void)
{
static_branch_enable(&udpv6_encap_needed_key);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 95dee9ca8d22..1b8e161ac527 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -119,7 +119,7 @@ static struct sk_buff *udp6_gro_receive(struct list_head *head,
{
struct udphdr *uh = udp_gro_udphdr(skb);
- if (unlikely(!uh))
+ if (unlikely(!uh) || !static_branch_unlikely(&udpv6_encap_needed_key))
goto flush;
/* Don't bother verifying checksum if we're going to flush anyway. */
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index e2f16a0173a9..45115c125569 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -48,7 +48,7 @@ static struct iucv_interface *pr_iucv;
static const u8 iprm_shutdown[8] =
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01};
-#define TRGCLS_SIZE (sizeof(((struct iucv_message *)0)->class))
+#define TRGCLS_SIZE FIELD_SIZEOF(struct iucv_message, class)
#define __iucv_sock_wait(sk, condition, timeo, ret) \
do { \
@@ -320,13 +320,9 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
struct sk_buff *nskb;
int err, confirm_recv = 0;
- memset(skb->head, 0, ETH_HLEN);
- phs_hdr = skb_push(skb, sizeof(struct af_iucv_trans_hdr));
- skb_reset_mac_header(skb);
+ phs_hdr = skb_push(skb, sizeof(*phs_hdr));
+ memset(phs_hdr, 0, sizeof(*phs_hdr));
skb_reset_network_header(skb);
- skb_push(skb, ETH_HLEN);
- skb_reset_mac_header(skb);
- memset(phs_hdr, 0, sizeof(struct af_iucv_trans_hdr));
phs_hdr->magic = ETH_P_AF_IUCV;
phs_hdr->version = 1;
@@ -350,6 +346,9 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
if (imsg)
memcpy(&phs_hdr->iucv_hdr, imsg, sizeof(struct iucv_message));
+ skb_push(skb, ETH_HLEN);
+ memset(skb->data, 0, ETH_HLEN);
+
skb->dev = iucv->hs_dev;
if (!skb->dev) {
err = -ENODEV;
@@ -1943,8 +1942,7 @@ static void iucv_callback_shutdown(struct iucv_path *path, u8 ipuser[16])
/***************** HiperSockets transport callbacks ********************/
static void afiucv_swap_src_dest(struct sk_buff *skb)
{
- struct af_iucv_trans_hdr *trans_hdr =
- (struct af_iucv_trans_hdr *)skb->data;
+ struct af_iucv_trans_hdr *trans_hdr = iucv_trans_hdr(skb);
char tmpID[8];
char tmpName[8];
@@ -1967,13 +1965,12 @@ static void afiucv_swap_src_dest(struct sk_buff *skb)
**/
static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb)
{
+ struct af_iucv_trans_hdr *trans_hdr = iucv_trans_hdr(skb);
struct sock *nsk;
struct iucv_sock *iucv, *niucv;
- struct af_iucv_trans_hdr *trans_hdr;
int err;
iucv = iucv_sk(sk);
- trans_hdr = (struct af_iucv_trans_hdr *)skb->data;
if (!iucv) {
/* no sock - connection refused */
afiucv_swap_src_dest(skb);
@@ -2034,15 +2031,13 @@ out:
static int afiucv_hs_callback_synack(struct sock *sk, struct sk_buff *skb)
{
struct iucv_sock *iucv = iucv_sk(sk);
- struct af_iucv_trans_hdr *trans_hdr =
- (struct af_iucv_trans_hdr *)skb->data;
if (!iucv)
goto out;
if (sk->sk_state != IUCV_BOUND)
goto out;
bh_lock_sock(sk);
- iucv->msglimit_peer = trans_hdr->window;
+ iucv->msglimit_peer = iucv_trans_hdr(skb)->window;
sk->sk_state = IUCV_CONNECTED;
sk->sk_state_change(sk);
bh_unlock_sock(sk);
@@ -2098,8 +2093,6 @@ out:
static int afiucv_hs_callback_win(struct sock *sk, struct sk_buff *skb)
{
struct iucv_sock *iucv = iucv_sk(sk);
- struct af_iucv_trans_hdr *trans_hdr =
- (struct af_iucv_trans_hdr *)skb->data;
if (!iucv)
return NET_RX_SUCCESS;
@@ -2107,7 +2100,7 @@ static int afiucv_hs_callback_win(struct sock *sk, struct sk_buff *skb)
if (sk->sk_state != IUCV_CONNECTED)
return NET_RX_SUCCESS;
- atomic_sub(trans_hdr->window, &iucv->msg_sent);
+ atomic_sub(iucv_trans_hdr(skb)->window, &iucv->msg_sent);
iucv_sock_wake_msglim(sk);
return NET_RX_SUCCESS;
}
@@ -2170,22 +2163,13 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
int err = NET_RX_SUCCESS;
char nullstring[8];
- if (skb->len < (ETH_HLEN + sizeof(struct af_iucv_trans_hdr))) {
- WARN_ONCE(1, "AF_IUCV too short skb, len=%d, min=%d",
- (int)skb->len,
- (int)(ETH_HLEN + sizeof(struct af_iucv_trans_hdr)));
+ if (!pskb_may_pull(skb, sizeof(*trans_hdr))) {
+ WARN_ONCE(1, "AF_IUCV failed to receive skb, len=%u", skb->len);
kfree_skb(skb);
return NET_RX_SUCCESS;
}
- if (skb_headlen(skb) < (ETH_HLEN + sizeof(struct af_iucv_trans_hdr)))
- if (skb_linearize(skb)) {
- WARN_ONCE(1, "AF_IUCV skb_linearize failed, len=%d",
- (int)skb->len);
- kfree_skb(skb);
- return NET_RX_SUCCESS;
- }
- skb_pull(skb, ETH_HLEN);
- trans_hdr = (struct af_iucv_trans_hdr *)skb->data;
+
+ trans_hdr = iucv_trans_hdr(skb);
EBCASC(trans_hdr->destAppName, sizeof(trans_hdr->destAppName));
EBCASC(trans_hdr->destUserID, sizeof(trans_hdr->destUserID));
EBCASC(trans_hdr->srcAppName, sizeof(trans_hdr->srcAppName));
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 260b3dc1b4a2..64d4bef04e73 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -127,9 +127,7 @@ void llc_sap_close(struct llc_sap *sap)
list_del_rcu(&sap->node);
spin_unlock_bh(&llc_sap_list_lock);
- synchronize_rcu();
-
- kfree(sap);
+ kfree_rcu(sap, rcu);
}
static struct packet_type llc_packet_type __read_mostly = {
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 76e30f4797fb..f869e35d0974 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -27,20 +27,6 @@ config MAC80211_RC_MINSTREL
---help---
This option enables the 'minstrel' TX rate control algorithm
-config MAC80211_RC_MINSTREL_HT
- bool "Minstrel 802.11n support" if EXPERT
- depends on MAC80211_RC_MINSTREL
- default y
- ---help---
- This option enables the 'minstrel_ht' TX rate control algorithm
-
-config MAC80211_RC_MINSTREL_VHT
- bool "Minstrel 802.11ac support" if EXPERT
- depends on MAC80211_RC_MINSTREL_HT
- default n
- ---help---
- This option enables VHT in the 'minstrel_ht' TX rate control algorithm
-
choice
prompt "Default rate control algorithm"
depends on MAC80211_HAS_RC
@@ -62,8 +48,7 @@ endchoice
config MAC80211_RC_DEFAULT
string
- default "minstrel_ht" if MAC80211_RC_DEFAULT_MINSTREL && MAC80211_RC_MINSTREL_HT
- default "minstrel" if MAC80211_RC_DEFAULT_MINSTREL
+ default "minstrel_ht" if MAC80211_RC_DEFAULT_MINSTREL
default ""
endif
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index bb707789ef2b..4f03ebe732fa 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -53,13 +53,14 @@ mac80211-$(CONFIG_PM) += pm.o
CFLAGS_trace.o := -I$(src)
-rc80211_minstrel-y := rc80211_minstrel.o
-rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_debugfs.o
+rc80211_minstrel-y := \
+ rc80211_minstrel.o \
+ rc80211_minstrel_ht.o
-rc80211_minstrel_ht-y := rc80211_minstrel_ht.o
-rc80211_minstrel_ht-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_ht_debugfs.o
+rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += \
+ rc80211_minstrel_debugfs.o \
+ rc80211_minstrel_ht_debugfs.o
mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y)
-mac80211-$(CONFIG_MAC80211_RC_MINSTREL_HT) += $(rc80211_minstrel_ht-y)
ccflags-y += -DDEBUG
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 5d22eda8a6b1..51622333d460 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -158,12 +158,10 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
if (ret)
return ret;
- if (type == NL80211_IFTYPE_AP_VLAN &&
- params && params->use_4addr == 0) {
+ if (type == NL80211_IFTYPE_AP_VLAN && params->use_4addr == 0) {
RCU_INIT_POINTER(sdata->u.vlan.sta, NULL);
ieee80211_check_fast_rx_iface(sdata);
- } else if (type == NL80211_IFTYPE_STATION &&
- params && params->use_4addr >= 0) {
+ } else if (type == NL80211_IFTYPE_STATION && params->use_4addr >= 0) {
sdata->u.mgd.use_4addr = params->use_4addr;
}
@@ -792,6 +790,48 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
return 0;
}
+static int ieee80211_set_ftm_responder_params(
+ struct ieee80211_sub_if_data *sdata,
+ const u8 *lci, size_t lci_len,
+ const u8 *civicloc, size_t civicloc_len)
+{
+ struct ieee80211_ftm_responder_params *new, *old;
+ struct ieee80211_bss_conf *bss_conf;
+ u8 *pos;
+ int len;
+
+ if ((!lci || !lci_len) && (!civicloc || !civicloc_len))
+ return 1;
+
+ bss_conf = &sdata->vif.bss_conf;
+ old = bss_conf->ftmr_params;
+ len = lci_len + civicloc_len;
+
+ new = kzalloc(sizeof(*new) + len, GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
+
+ pos = (u8 *)(new + 1);
+ if (lci_len) {
+ new->lci_len = lci_len;
+ new->lci = pos;
+ memcpy(pos, lci, lci_len);
+ pos += lci_len;
+ }
+
+ if (civicloc_len) {
+ new->civicloc_len = civicloc_len;
+ new->civicloc = pos;
+ memcpy(pos, civicloc, civicloc_len);
+ pos += civicloc_len;
+ }
+
+ bss_conf->ftmr_params = new;
+ kfree(old);
+
+ return 0;
+}
+
static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
struct cfg80211_beacon_data *params,
const struct ieee80211_csa_settings *csa)
@@ -865,6 +905,20 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
if (err == 0)
changed |= BSS_CHANGED_AP_PROBE_RESP;
+ if (params->ftm_responder != -1) {
+ sdata->vif.bss_conf.ftm_responder = params->ftm_responder;
+ err = ieee80211_set_ftm_responder_params(sdata,
+ params->lci,
+ params->lci_len,
+ params->civicloc,
+ params->civicloc_len);
+
+ if (err < 0)
+ return err;
+
+ changed |= BSS_CHANGED_FTM_RESPONDER;
+ }
+
rcu_assign_pointer(sdata->u.ap.beacon, new);
if (old)
@@ -911,6 +965,9 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
sdata->vif.bss_conf.beacon_int = params->beacon_interval;
+ if (params->he_cap)
+ sdata->vif.bss_conf.he_support = true;
+
mutex_lock(&local->mtx);
err = ieee80211_vif_use_channel(sdata, &params->chandef,
IEEE80211_CHANCTX_SHARED);
@@ -1062,6 +1119,9 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
kfree_rcu(old_probe_resp, rcu_head);
sdata->u.ap.driver_smps_mode = IEEE80211_SMPS_OFF;
+ kfree(sdata->vif.bss_conf.ftmr_params);
+ sdata->vif.bss_conf.ftmr_params = NULL;
+
__sta_info_flush(sdata, true);
ieee80211_free_keys(sdata, true);
@@ -1092,50 +1152,6 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
return 0;
}
-/* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */
-struct iapp_layer2_update {
- u8 da[ETH_ALEN]; /* broadcast */
- u8 sa[ETH_ALEN]; /* STA addr */
- __be16 len; /* 6 */
- u8 dsap; /* 0 */
- u8 ssap; /* 0 */
- u8 control;
- u8 xid_info[3];
-} __packed;
-
-static void ieee80211_send_layer2_update(struct sta_info *sta)
-{
- struct iapp_layer2_update *msg;
- struct sk_buff *skb;
-
- /* Send Level 2 Update Frame to update forwarding tables in layer 2
- * bridge devices */
-
- skb = dev_alloc_skb(sizeof(*msg));
- if (!skb)
- return;
- msg = skb_put(skb, sizeof(*msg));
-
- /* 802.2 Type 1 Logical Link Control (LLC) Exchange Identifier (XID)
- * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */
-
- eth_broadcast_addr(msg->da);
- memcpy(msg->sa, sta->sta.addr, ETH_ALEN);
- msg->len = htons(6);
- msg->dsap = 0;
- msg->ssap = 0x01; /* NULL LSAP, CR Bit: Response */
- msg->control = 0xaf; /* XID response lsb.1111F101.
- * F=0 (no poll command; unsolicited frame) */
- msg->xid_info[0] = 0x81; /* XID format identifier */
- msg->xid_info[1] = 1; /* LLC types/classes: Type 1 LLC */
- msg->xid_info[2] = 0; /* XID sender's receive window size (RW) */
-
- skb->dev = sta->sdata->dev;
- skb->protocol = eth_type_trans(skb, sta->sdata->dev);
- memset(skb->cb, 0, sizeof(skb->cb));
- netif_rx_ni(skb);
-}
-
static int sta_apply_auth_flags(struct ieee80211_local *local,
struct sta_info *sta,
u32 mask, u32 set)
@@ -1499,7 +1515,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
}
if (layer2_update)
- ieee80211_send_layer2_update(sta);
+ cfg80211_send_layer2_update(sta->sdata->dev, sta->sta.addr);
rcu_read_unlock();
@@ -1601,7 +1617,7 @@ static int ieee80211_change_station(struct wiphy *wiphy,
if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
ieee80211_vif_inc_num_mcast(sta->sdata);
- ieee80211_send_layer2_update(sta);
+ cfg80211_send_layer2_update(sta->sdata->dev, sta->sta.addr);
}
err = sta_apply_parameters(local, sta, params);
@@ -2918,6 +2934,20 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon)
memcpy(pos, beacon->probe_resp, beacon->probe_resp_len);
pos += beacon->probe_resp_len;
}
+ if (beacon->ftm_responder)
+ new_beacon->ftm_responder = beacon->ftm_responder;
+ if (beacon->lci) {
+ new_beacon->lci_len = beacon->lci_len;
+ new_beacon->lci = pos;
+ memcpy(pos, beacon->lci, beacon->lci_len);
+ pos += beacon->lci_len;
+ }
+ if (beacon->civicloc) {
+ new_beacon->civicloc_len = beacon->civicloc_len;
+ new_beacon->civicloc = pos;
+ memcpy(pos, beacon->civicloc, beacon->civicloc_len);
+ pos += beacon->civicloc_len;
+ }
return new_beacon;
}
@@ -3808,6 +3838,17 @@ out:
return ret;
}
+static int
+ieee80211_get_ftm_responder_stats(struct wiphy *wiphy,
+ struct net_device *dev,
+ struct cfg80211_ftm_responder_stats *ftm_stats)
+{
+ struct ieee80211_local *local = wiphy_priv(wiphy);
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+ return drv_get_ftm_responder_stats(local, sdata, ftm_stats);
+}
+
const struct cfg80211_ops mac80211_config_ops = {
.add_virtual_intf = ieee80211_add_iface,
.del_virtual_intf = ieee80211_del_iface,
@@ -3902,4 +3943,5 @@ const struct cfg80211_ops mac80211_config_ops = {
.set_multicast_to_unicast = ieee80211_set_multicast_to_unicast,
.tx_control_port = ieee80211_tx_control_port,
.get_txq_stats = ieee80211_get_txq_stats,
+ .get_ftm_responder_stats = ieee80211_get_ftm_responder_stats,
};
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index b5adf3625d16..3fe541e358f3 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -3,6 +3,7 @@
*
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright (C) 2018 Intel Corporation
*
* GPLv2
*
@@ -214,6 +215,9 @@ static const char *hw_flag_names[] = {
FLAG(SUPPORTS_TDLS_BUFFER_STA),
FLAG(DEAUTH_NEED_MGD_TX_PREP),
FLAG(DOESNT_SUPPORT_QOS_NDP),
+ FLAG(BUFF_MMPDU_TXQ),
+ FLAG(SUPPORTS_VHT_EXT_NSS_BW),
+ FLAG(STA_MMPDU_TXQ),
#undef FLAG
};
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 4105081dc1df..af5185a836e5 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -4,6 +4,7 @@
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2016 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -140,7 +141,7 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
{
struct sta_info *sta = file->private_data;
struct ieee80211_local *local = sta->local;
- size_t bufsz = AQM_TXQ_ENTRY_LEN*(IEEE80211_NUM_TIDS+1);
+ size_t bufsz = AQM_TXQ_ENTRY_LEN * (IEEE80211_NUM_TIDS + 2);
char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf;
struct txq_info *txqi;
ssize_t rv;
@@ -162,7 +163,9 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
bufsz+buf-p,
"tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets flags\n");
- for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
+ for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
+ if (!sta->sta.txq[i])
+ continue;
txqi = to_txq_info(sta->sta.txq[i]);
p += scnprintf(p, bufsz+buf-p,
"%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s)\n",
@@ -487,12 +490,368 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
p += scnprintf(p, sizeof(buf)+buf-p,
"MCS TX highest: %d Mbps\n",
le16_to_cpu(vhtc->vht_mcs.tx_highest));
+#undef PFLAG
}
return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
}
STA_OPS(vht_capa);
+static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ char *buf, *p;
+ size_t buf_sz = PAGE_SIZE;
+ struct sta_info *sta = file->private_data;
+ struct ieee80211_sta_he_cap *hec = &sta->sta.he_cap;
+ struct ieee80211_he_mcs_nss_supp *nss = &hec->he_mcs_nss_supp;
+ u8 ppe_size;
+ u8 *cap;
+ int i;
+ ssize_t ret;
+
+ buf = kmalloc(buf_sz, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ p = buf;
+
+ p += scnprintf(p, buf_sz + buf - p, "HE %ssupported\n",
+ hec->has_he ? "" : "not ");
+ if (!hec->has_he)
+ goto out;
+
+ cap = hec->he_cap_elem.mac_cap_info;
+ p += scnprintf(p, buf_sz + buf - p,
+ "MAC-CAP: %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x\n",
+ cap[0], cap[1], cap[2], cap[3], cap[4], cap[5]);
+
+#define PRINT(fmt, ...) \
+ p += scnprintf(p, buf_sz + buf - p, "\t\t" fmt "\n", \
+ ##__VA_ARGS__)
+
+#define PFLAG(t, n, a, b) \
+ do { \
+ if (cap[n] & IEEE80211_HE_##t##_CAP##n##_##a) \
+ PRINT("%s", b); \
+ } while (0)
+
+#define PFLAG_RANGE(t, i, n, s, m, off, fmt) \
+ do { \
+ u8 msk = IEEE80211_HE_##t##_CAP##i##_##n##_MASK; \
+ u8 idx = ((cap[i] & msk) >> (ffs(msk) - 1)) + off; \
+ PRINT(fmt, (s << idx) + (m * idx)); \
+ } while (0)
+
+#define PFLAG_RANGE_DEFAULT(t, i, n, s, m, off, fmt, a, b) \
+ do { \
+ if (cap[i] == IEEE80211_HE_##t ##_CAP##i##_##n##_##a) { \
+ PRINT("%s", b); \
+ break; \
+ } \
+ PFLAG_RANGE(t, i, n, s, m, off, fmt); \
+ } while (0)
+
+ PFLAG(MAC, 0, HTC_HE, "HTC-HE");
+ PFLAG(MAC, 0, TWT_REQ, "TWT-REQ");
+ PFLAG(MAC, 0, TWT_RES, "TWT-RES");
+ PFLAG_RANGE_DEFAULT(MAC, 0, DYNAMIC_FRAG, 0, 1, 0,
+ "DYNAMIC-FRAG-LEVEL-%d", NOT_SUPP, "NOT-SUPP");
+ PFLAG_RANGE_DEFAULT(MAC, 0, MAX_NUM_FRAG_MSDU, 1, 0, 0,
+ "MAX-NUM-FRAG-MSDU-%d", UNLIMITED, "UNLIMITED");
+
+ PFLAG_RANGE_DEFAULT(MAC, 1, MIN_FRAG_SIZE, 128, 0, -1,
+ "MIN-FRAG-SIZE-%d", UNLIMITED, "UNLIMITED");
+ PFLAG_RANGE_DEFAULT(MAC, 1, TF_MAC_PAD_DUR, 0, 8, 0,
+ "TF-MAC-PAD-DUR-%dUS", MASK, "UNKNOWN");
+ PFLAG_RANGE(MAC, 1, MULTI_TID_AGG_RX_QOS, 0, 1, 1,
+ "MULTI-TID-AGG-RX-QOS-%d");
+
+ if (cap[0] & IEEE80211_HE_MAC_CAP0_HTC_HE) {
+ switch (((cap[2] << 1) | (cap[1] >> 7)) & 0x3) {
+ case 0:
+ PRINT("LINK-ADAPTATION-NO-FEEDBACK");
+ break;
+ case 1:
+ PRINT("LINK-ADAPTATION-RESERVED");
+ break;
+ case 2:
+ PRINT("LINK-ADAPTATION-UNSOLICITED-FEEDBACK");
+ break;
+ case 3:
+ PRINT("LINK-ADAPTATION-BOTH");
+ break;
+ }
+ }
+
+ PFLAG(MAC, 2, ALL_ACK, "ALL-ACK");
+ PFLAG(MAC, 2, TRS, "TRS");
+ PFLAG(MAC, 2, BSR, "BSR");
+ PFLAG(MAC, 2, BCAST_TWT, "BCAST-TWT");
+ PFLAG(MAC, 2, 32BIT_BA_BITMAP, "32BIT-BA-BITMAP");
+ PFLAG(MAC, 2, MU_CASCADING, "MU-CASCADING");
+ PFLAG(MAC, 2, ACK_EN, "ACK-EN");
+
+ PFLAG(MAC, 3, OMI_CONTROL, "OMI-CONTROL");
+ PFLAG(MAC, 3, OFDMA_RA, "OFDMA-RA");
+
+ switch (cap[3] & IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK) {
+ case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_USE_VHT:
+ PRINT("MAX-AMPDU-LEN-EXP-USE-VHT");
+ break;
+ case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_1:
+ PRINT("MAX-AMPDU-LEN-EXP-VHT-1");
+ break;
+ case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2:
+ PRINT("MAX-AMPDU-LEN-EXP-VHT-2");
+ break;
+ case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_RESERVED:
+ PRINT("MAX-AMPDU-LEN-EXP-RESERVED");
+ break;
+ }
+
+ PFLAG(MAC, 3, AMSDU_FRAG, "AMSDU-FRAG");
+ PFLAG(MAC, 3, FLEX_TWT_SCHED, "FLEX-TWT-SCHED");
+ PFLAG(MAC, 3, RX_CTRL_FRAME_TO_MULTIBSS, "RX-CTRL-FRAME-TO-MULTIBSS");
+
+ PFLAG(MAC, 4, BSRP_BQRP_A_MPDU_AGG, "BSRP-BQRP-A-MPDU-AGG");
+ PFLAG(MAC, 4, QTP, "QTP");
+ PFLAG(MAC, 4, BQR, "BQR");
+ PFLAG(MAC, 4, SRP_RESP, "SRP-RESP");
+ PFLAG(MAC, 4, NDP_FB_REP, "NDP-FB-REP");
+ PFLAG(MAC, 4, OPS, "OPS");
+ PFLAG(MAC, 4, AMDSU_IN_AMPDU, "AMSDU-IN-AMPDU");
+
+ PRINT("MULTI-TID-AGG-TX-QOS-%d", ((cap[5] << 1) | (cap[4] >> 7)) & 0x7);
+
+ PFLAG(MAC, 5, SUBCHAN_SELECVITE_TRANSMISSION,
+ "SUBCHAN-SELECVITE-TRANSMISSION");
+ PFLAG(MAC, 5, UL_2x996_TONE_RU, "UL-2x996-TONE-RU");
+ PFLAG(MAC, 5, OM_CTRL_UL_MU_DATA_DIS_RX, "OM-CTRL-UL-MU-DATA-DIS-RX");
+
+ cap = hec->he_cap_elem.phy_cap_info;
+ p += scnprintf(p, buf_sz + buf - p,
+ "PHY CAP: %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x\n",
+ cap[0], cap[1], cap[2], cap[3], cap[4], cap[5], cap[6],
+ cap[7], cap[8], cap[9], cap[10]);
+
+ PFLAG(PHY, 0, CHANNEL_WIDTH_SET_40MHZ_IN_2G,
+ "CHANNEL-WIDTH-SET-40MHZ-IN-2G");
+ PFLAG(PHY, 0, CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G,
+ "CHANNEL-WIDTH-SET-40MHZ-80MHZ-IN-5G");
+ PFLAG(PHY, 0, CHANNEL_WIDTH_SET_160MHZ_IN_5G,
+ "CHANNEL-WIDTH-SET-160MHZ-IN-5G");
+ PFLAG(PHY, 0, CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G,
+ "CHANNEL-WIDTH-SET-80PLUS80-MHZ-IN-5G");
+ PFLAG(PHY, 0, CHANNEL_WIDTH_SET_RU_MAPPING_IN_2G,
+ "CHANNEL-WIDTH-SET-RU-MAPPING-IN-2G");
+ PFLAG(PHY, 0, CHANNEL_WIDTH_SET_RU_MAPPING_IN_5G,
+ "CHANNEL-WIDTH-SET-RU-MAPPING-IN-5G");
+
+ switch (cap[1] & IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_MASK) {
+ case IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_80MHZ_ONLY_SECOND_20MHZ:
+ PRINT("PREAMBLE-PUNC-RX-80MHZ-ONLY-SECOND-20MHZ");
+ break;
+ case IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_80MHZ_ONLY_SECOND_40MHZ:
+ PRINT("PREAMBLE-PUNC-RX-80MHZ-ONLY-SECOND-40MHZ");
+ break;
+ case IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_160MHZ_ONLY_SECOND_20MHZ:
+ PRINT("PREAMBLE-PUNC-RX-160MHZ-ONLY-SECOND-20MHZ");
+ break;
+ case IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_160MHZ_ONLY_SECOND_40MHZ:
+ PRINT("PREAMBLE-PUNC-RX-160MHZ-ONLY-SECOND-40MHZ");
+ break;
+ }
+
+ PFLAG(PHY, 1, DEVICE_CLASS_A,
+ "IEEE80211-HE-PHY-CAP1-DEVICE-CLASS-A");
+ PFLAG(PHY, 1, LDPC_CODING_IN_PAYLOAD,
+ "LDPC-CODING-IN-PAYLOAD");
+ PFLAG(PHY, 1, HE_LTF_AND_GI_FOR_HE_PPDUS_0_8US,
+ "HY-CAP1-HE-LTF-AND-GI-FOR-HE-PPDUS-0-8US");
+ PRINT("MIDAMBLE-RX-MAX-NSTS-%d", ((cap[2] << 1) | (cap[1] >> 7)) & 0x3);
+
+ PFLAG(PHY, 2, NDP_4x_LTF_AND_3_2US, "NDP-4X-LTF-AND-3-2US");
+ PFLAG(PHY, 2, STBC_TX_UNDER_80MHZ, "STBC-TX-UNDER-80MHZ");
+ PFLAG(PHY, 2, STBC_RX_UNDER_80MHZ, "STBC-RX-UNDER-80MHZ");
+ PFLAG(PHY, 2, DOPPLER_TX, "DOPPLER-TX");
+ PFLAG(PHY, 2, DOPPLER_RX, "DOPPLER-RX");
+ PFLAG(PHY, 2, UL_MU_FULL_MU_MIMO, "UL-MU-FULL-MU-MIMO");
+ PFLAG(PHY, 2, UL_MU_PARTIAL_MU_MIMO, "UL-MU-PARTIAL-MU-MIMO");
+
+ switch (cap[3] & IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_MASK) {
+ case IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_NO_DCM:
+ PRINT("DCM-MAX-CONST-TX-NO-DCM");
+ break;
+ case IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_BPSK:
+ PRINT("DCM-MAX-CONST-TX-BPSK");
+ break;
+ case IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_QPSK:
+ PRINT("DCM-MAX-CONST-TX-QPSK");
+ break;
+ case IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_16_QAM:
+ PRINT("DCM-MAX-CONST-TX-16-QAM");
+ break;
+ }
+
+ PFLAG(PHY, 3, DCM_MAX_TX_NSS_1, "DCM-MAX-TX-NSS-1");
+ PFLAG(PHY, 3, DCM_MAX_TX_NSS_2, "DCM-MAX-TX-NSS-2");
+
+ switch (cap[3] & IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_MASK) {
+ case IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_NO_DCM:
+ PRINT("DCM-MAX-CONST-RX-NO-DCM");
+ break;
+ case IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_BPSK:
+ PRINT("DCM-MAX-CONST-RX-BPSK");
+ break;
+ case IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_QPSK:
+ PRINT("DCM-MAX-CONST-RX-QPSK");
+ break;
+ case IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_16_QAM:
+ PRINT("DCM-MAX-CONST-RX-16-QAM");
+ break;
+ }
+
+ PFLAG(PHY, 3, DCM_MAX_RX_NSS_1, "DCM-MAX-RX-NSS-1");
+ PFLAG(PHY, 3, DCM_MAX_RX_NSS_2, "DCM-MAX-RX-NSS-2");
+ PFLAG(PHY, 3, RX_HE_MU_PPDU_FROM_NON_AP_STA,
+ "RX-HE-MU-PPDU-FROM-NON-AP-STA");
+ PFLAG(PHY, 3, SU_BEAMFORMER, "SU-BEAMFORMER");
+
+ PFLAG(PHY, 4, SU_BEAMFORMEE, "SU-BEAMFORMEE");
+ PFLAG(PHY, 4, MU_BEAMFORMER, "MU-BEAMFORMER");
+
+ PFLAG_RANGE(PHY, 4, BEAMFORMEE_MAX_STS_UNDER_80MHZ, 0, 1, 4,
+ "BEAMFORMEE-MAX-STS-UNDER-%d");
+ PFLAG_RANGE(PHY, 4, BEAMFORMEE_MAX_STS_ABOVE_80MHZ, 0, 1, 4,
+ "BEAMFORMEE-MAX-STS-ABOVE-%d");
+
+ PFLAG_RANGE(PHY, 5, BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ, 0, 1, 1,
+ "NUM-SND-DIM-UNDER-80MHZ-%d");
+ PFLAG_RANGE(PHY, 5, BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ, 0, 1, 1,
+ "NUM-SND-DIM-ABOVE-80MHZ-%d");
+ PFLAG(PHY, 5, NG16_SU_FEEDBACK, "NG16-SU-FEEDBACK");
+ PFLAG(PHY, 5, NG16_MU_FEEDBACK, "NG16-MU-FEEDBACK");
+
+ PFLAG(PHY, 6, CODEBOOK_SIZE_42_SU, "CODEBOOK-SIZE-42-SU");
+ PFLAG(PHY, 6, CODEBOOK_SIZE_75_MU, "CODEBOOK-SIZE-75-MU");
+ PFLAG(PHY, 6, TRIG_SU_BEAMFORMER_FB, "TRIG-SU-BEAMFORMER-FB");
+ PFLAG(PHY, 6, TRIG_MU_BEAMFORMER_FB, "TRIG-MU-BEAMFORMER-FB");
+ PFLAG(PHY, 6, TRIG_CQI_FB, "TRIG-CQI-FB");
+ PFLAG(PHY, 6, PARTIAL_BW_EXT_RANGE, "PARTIAL-BW-EXT-RANGE");
+ PFLAG(PHY, 6, PARTIAL_BANDWIDTH_DL_MUMIMO,
+ "PARTIAL-BANDWIDTH-DL-MUMIMO");
+ PFLAG(PHY, 6, PPE_THRESHOLD_PRESENT, "PPE-THRESHOLD-PRESENT");
+
+ PFLAG(PHY, 7, SRP_BASED_SR, "SRP-BASED-SR");
+ PFLAG(PHY, 7, POWER_BOOST_FACTOR_AR, "POWER-BOOST-FACTOR-AR");
+ PFLAG(PHY, 7, HE_SU_MU_PPDU_4XLTF_AND_08_US_GI,
+ "HE-SU-MU-PPDU-4XLTF-AND-08-US-GI");
+ PFLAG_RANGE(PHY, 7, MAX_NC, 0, 1, 1, "MAX-NC-%d");
+ PFLAG(PHY, 7, STBC_TX_ABOVE_80MHZ, "STBC-TX-ABOVE-80MHZ");
+ PFLAG(PHY, 7, STBC_RX_ABOVE_80MHZ, "STBC-RX-ABOVE-80MHZ");
+
+ PFLAG(PHY, 8, HE_ER_SU_PPDU_4XLTF_AND_08_US_GI,
+ "HE-ER-SU-PPDU-4XLTF-AND-08-US-GI");
+ PFLAG(PHY, 8, 20MHZ_IN_40MHZ_HE_PPDU_IN_2G,
+ "20MHZ-IN-40MHZ-HE-PPDU-IN-2G");
+ PFLAG(PHY, 8, 20MHZ_IN_160MHZ_HE_PPDU, "20MHZ-IN-160MHZ-HE-PPDU");
+ PFLAG(PHY, 8, 80MHZ_IN_160MHZ_HE_PPDU, "80MHZ-IN-160MHZ-HE-PPDU");
+ PFLAG(PHY, 8, HE_ER_SU_1XLTF_AND_08_US_GI,
+ "HE-ER-SU-1XLTF-AND-08-US-GI");
+ PFLAG(PHY, 8, MIDAMBLE_RX_TX_2X_AND_1XLTF,
+ "MIDAMBLE-RX-TX-2X-AND-1XLTF");
+
+ switch (cap[8] & IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_MASK) {
+ case IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_20MHZ:
+ PRINT("DDCM-MAX-BW-20MHZ");
+ break;
+ case IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_40MHZ:
+ PRINT("DCM-MAX-BW-40MHZ");
+ break;
+ case IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_80MHZ:
+ PRINT("DCM-MAX-BW-80MHZ");
+ break;
+ case IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_160_OR_80P80_MHZ:
+ PRINT("DCM-MAX-BW-160-OR-80P80-MHZ");
+ break;
+ }
+
+ PFLAG(PHY, 9, LONGER_THAN_16_SIGB_OFDM_SYM,
+ "LONGER-THAN-16-SIGB-OFDM-SYM");
+ PFLAG(PHY, 9, NON_TRIGGERED_CQI_FEEDBACK,
+ "NON-TRIGGERED-CQI-FEEDBACK");
+ PFLAG(PHY, 9, TX_1024_QAM_LESS_THAN_242_TONE_RU,
+ "TX-1024-QAM-LESS-THAN-242-TONE-RU");
+ PFLAG(PHY, 9, RX_1024_QAM_LESS_THAN_242_TONE_RU,
+ "RX-1024-QAM-LESS-THAN-242-TONE-RU");
+ PFLAG(PHY, 9, RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB,
+ "RX-FULL-BW-SU-USING-MU-WITH-COMP-SIGB");
+ PFLAG(PHY, 9, RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB,
+ "RX-FULL-BW-SU-USING-MU-WITH-NON-COMP-SIGB");
+
+#undef PFLAG_RANGE_DEFAULT
+#undef PFLAG_RANGE
+#undef PFLAG
+
+#define PRINT_NSS_SUPP(f, n) \
+ do { \
+ int i; \
+ u16 v = le16_to_cpu(nss->f); \
+ p += scnprintf(p, buf_sz + buf - p, n ": %#.4x\n", v); \
+ for (i = 0; i < 8; i += 2) { \
+ switch ((v >> i) & 0x3) { \
+ case 0: \
+ PRINT(n "-%d-SUPPORT-0-7", i / 2); \
+ break; \
+ case 1: \
+ PRINT(n "-%d-SUPPORT-0-9", i / 2); \
+ break; \
+ case 2: \
+ PRINT(n "-%d-SUPPORT-0-11", i / 2); \
+ break; \
+ case 3: \
+ PRINT(n "-%d-NOT-SUPPORTED", i / 2); \
+ break; \
+ } \
+ } \
+ } while (0)
+
+ PRINT_NSS_SUPP(rx_mcs_80, "RX-MCS-80");
+ PRINT_NSS_SUPP(tx_mcs_80, "TX-MCS-80");
+
+ if (cap[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G) {
+ PRINT_NSS_SUPP(rx_mcs_160, "RX-MCS-160");
+ PRINT_NSS_SUPP(tx_mcs_160, "TX-MCS-160");
+ }
+
+ if (cap[0] &
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G) {
+ PRINT_NSS_SUPP(rx_mcs_80p80, "RX-MCS-80P80");
+ PRINT_NSS_SUPP(tx_mcs_80p80, "TX-MCS-80P80");
+ }
+
+#undef PRINT_NSS_SUPP
+#undef PRINT
+
+ if (!(cap[6] & IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT))
+ goto out;
+
+ p += scnprintf(p, buf_sz + buf - p, "PPE-THRESHOLDS: %#.2x",
+ hec->ppe_thres[0]);
+
+ ppe_size = ieee80211_he_ppe_size(hec->ppe_thres[0], cap);
+ for (i = 1; i < ppe_size; i++) {
+ p += scnprintf(p, buf_sz + buf - p, " %#.2x",
+ hec->ppe_thres[i]);
+ }
+ p += scnprintf(p, buf_sz + buf - p, "\n");
+
+out:
+ ret = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+ kfree(buf);
+ return ret;
+}
+STA_OPS(he_capa);
#define DEBUGFS_ADD(name) \
debugfs_create_file(#name, 0400, \
@@ -538,6 +897,7 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
DEBUGFS_ADD(agg_status);
DEBUGFS_ADD(ht_capa);
DEBUGFS_ADD(vht_capa);
+ DEBUGFS_ADD(he_capa);
DEBUGFS_ADD_COUNTER(rx_duplicates, rx_stats.num_duplicates);
DEBUGFS_ADD_COUNTER(rx_fragments, rx_stats.fragments);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 8f6998091d26..0b1747a2313d 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1173,6 +1173,32 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local,
local->ops->wake_tx_queue(&local->hw, &txq->txq);
}
+static inline int drv_can_aggregate_in_amsdu(struct ieee80211_local *local,
+ struct sk_buff *head,
+ struct sk_buff *skb)
+{
+ if (!local->ops->can_aggregate_in_amsdu)
+ return true;
+
+ return local->ops->can_aggregate_in_amsdu(&local->hw, head, skb);
+}
+
+static inline int
+drv_get_ftm_responder_stats(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_ftm_responder_stats *ftm_stats)
+{
+ u32 ret = -EOPNOTSUPP;
+
+ if (local->ops->get_ftm_responder_stats)
+ ret = local->ops->get_ftm_responder_stats(&local->hw,
+ &sdata->vif,
+ ftm_stats);
+ trace_drv_get_ftm_responder_stats(local, sdata, ftm_stats);
+
+ return ret;
+}
+
static inline int drv_start_nan(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct cfg80211_nan_conf *conf)
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index f0f5fedb8caa..0d704e8d7078 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1070,7 +1070,9 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
struct ieee80211_vht_cap cap_ie;
struct ieee80211_sta_vht_cap cap = sta->sta.vht_cap;
- ieee80211_chandef_vht_oper(elems->vht_operation,
+ ieee80211_chandef_vht_oper(&local->hw,
+ elems->vht_operation,
+ elems->ht_operation,
&chandef);
memcpy(&cap_ie, elems->vht_cap_elem, sizeof(cap_ie));
ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 172aeae21ae9..10a05062e4a0 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -377,6 +377,7 @@ struct ieee80211_mgd_auth_data {
u8 key[WLAN_KEY_LEN_WEP104];
u8 key_len, key_idx;
bool done;
+ bool peer_confirmed;
bool timeout_started;
u16 sae_trans, sae_status;
@@ -818,6 +819,7 @@ enum txq_info_flags {
IEEE80211_TXQ_STOP,
IEEE80211_TXQ_AMPDU,
IEEE80211_TXQ_NO_AMSDU,
+ IEEE80211_TXQ_STOP_NETIF_TX,
};
/**
@@ -1198,6 +1200,9 @@ struct ieee80211_local {
/* number of RX chains the hardware has */
u8 rx_chains;
+ /* bitmap of which sbands were copied */
+ u8 sband_allocated;
+
int tx_headroom; /* required headroom for hardware/radiotap */
/* Tasklet and skb queue to process calls from IRQ mode. All frames
@@ -1226,6 +1231,7 @@ struct ieee80211_local {
struct sk_buff_head pending[IEEE80211_MAX_QUEUES];
struct tasklet_struct tx_pending_tasklet;
+ struct tasklet_struct wake_txqs_tasklet;
atomic_t agg_queue_stop[IEEE80211_MAX_QUEUES];
@@ -2038,6 +2044,7 @@ void ieee80211_txq_remove_vlan(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata);
void ieee80211_fill_txq_stats(struct cfg80211_txq_stats *txqstats,
struct txq_info *txqi);
+void ieee80211_wake_txqs(unsigned long data);
void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
u16 transaction, u16 auth_alg, u16 status,
const u8 *extra, size_t extra_len, const u8 *bssid,
@@ -2106,7 +2113,9 @@ u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo);
/* channel management */
bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper,
struct cfg80211_chan_def *chandef);
-bool ieee80211_chandef_vht_oper(const struct ieee80211_vht_operation *oper,
+bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw,
+ const struct ieee80211_vht_operation *oper,
+ const struct ieee80211_ht_operation *htop,
struct cfg80211_chan_def *chandef);
u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index c054ac85793c..4700718e010f 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -248,6 +248,7 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
(key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
increment_tailroom_need_count(sdata);
+ key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
ret = drv_set_key(key->local, DISABLE_KEY, sdata,
sta ? &sta->sta : NULL, &key->conf);
@@ -256,8 +257,65 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
"failed to remove key (%d, %pM) from hardware (%d)\n",
key->conf.keyidx,
sta ? sta->sta.addr : bcast_addr, ret);
+}
- key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
+static int ieee80211_hw_key_replace(struct ieee80211_key *old_key,
+ struct ieee80211_key *new_key,
+ bool ptk0rekey)
+{
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_local *local;
+ struct sta_info *sta;
+ int ret;
+
+ /* Aggregation sessions are OK when running on SW crypto.
+ * A broken remote STA may cause issues not observed with HW
+ * crypto, though.
+ */
+ if (!(old_key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
+ return 0;
+
+ assert_key_lock(old_key->local);
+ sta = old_key->sta;
+
+ /* PTK only using key ID 0 needs special handling on rekey */
+ if (new_key && sta && ptk0rekey) {
+ local = old_key->local;
+ sdata = old_key->sdata;
+
+ /* Stop TX till we are on the new key */
+ old_key->flags |= KEY_FLAG_TAINTED;
+ ieee80211_clear_fast_xmit(sta);
+
+ /* Aggregation sessions during rekey are complicated due to the
+ * reorder buffer and retransmits. Side step that by blocking
+ * aggregation during rekey and tear down running sessions.
+ */
+ if (ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION)) {
+ set_sta_flag(sta, WLAN_STA_BLOCK_BA);
+ ieee80211_sta_tear_down_BA_sessions(sta,
+ AGG_STOP_LOCAL_REQUEST);
+ }
+
+ if (!wiphy_ext_feature_isset(local->hw.wiphy,
+ NL80211_EXT_FEATURE_CAN_REPLACE_PTK0)) {
+ pr_warn_ratelimited("Rekeying PTK for STA %pM but driver can't safely do that.",
+ sta->sta.addr);
+ /* Flushing the driver queues *may* help prevent
+ * the clear text leaks and freezes.
+ */
+ ieee80211_flush_queues(local, sdata, false);
+ }
+ }
+
+ ieee80211_key_disable_hw_accel(old_key);
+
+ if (new_key)
+ ret = ieee80211_key_enable_hw_accel(new_key);
+ else
+ ret = 0;
+
+ return ret;
}
static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
@@ -316,38 +374,57 @@ void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
}
-static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
+static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta,
bool pairwise,
struct ieee80211_key *old,
struct ieee80211_key *new)
{
int idx;
+ int ret;
bool defunikey, defmultikey, defmgmtkey;
/* caller must provide at least one old/new */
if (WARN_ON(!new && !old))
- return;
+ return 0;
if (new)
list_add_tail_rcu(&new->list, &sdata->key_list);
WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx);
- if (old)
+ if (old) {
idx = old->conf.keyidx;
- else
+ /* TODO: proper implement and test "Extended Key ID for
+ * Individually Addressed Frames" from IEEE 802.11-2016.
+ * Till then always assume only key ID 0 is used for
+ * pairwise keys.*/
+ ret = ieee80211_hw_key_replace(old, new, pairwise);
+ } else {
+ /* new must be provided in case old is not */
idx = new->conf.keyidx;
+ if (!new->local->wowlan)
+ ret = ieee80211_key_enable_hw_accel(new);
+ else
+ ret = 0;
+ }
+
+ if (ret)
+ return ret;
if (sta) {
if (pairwise) {
rcu_assign_pointer(sta->ptk[idx], new);
sta->ptk_idx = idx;
- ieee80211_check_fast_xmit(sta);
+ if (new) {
+ clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
+ ieee80211_check_fast_xmit(sta);
+ }
} else {
rcu_assign_pointer(sta->gtk[idx], new);
}
- ieee80211_check_fast_rx(sta);
+ if (new)
+ ieee80211_check_fast_rx(sta);
} else {
defunikey = old &&
old == key_mtx_dereference(sdata->local,
@@ -380,6 +457,8 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
if (old)
list_del_rcu(&old->list);
+
+ return 0;
}
struct ieee80211_key *
@@ -575,9 +654,6 @@ static void ieee80211_key_free_common(struct ieee80211_key *key)
static void __ieee80211_key_destroy(struct ieee80211_key *key,
bool delay_tailroom)
{
- if (key->local)
- ieee80211_key_disable_hw_accel(key);
-
if (key->local) {
struct ieee80211_sub_if_data *sdata = key->sdata;
@@ -654,7 +730,6 @@ int ieee80211_key_link(struct ieee80211_key *key,
struct ieee80211_sub_if_data *sdata,
struct sta_info *sta)
{
- struct ieee80211_local *local = sdata->local;
struct ieee80211_key *old_key;
int idx = key->conf.keyidx;
bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
@@ -691,17 +766,13 @@ int ieee80211_key_link(struct ieee80211_key *key,
increment_tailroom_need_count(sdata);
- ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
- ieee80211_key_destroy(old_key, delay_tailroom);
-
- ieee80211_debugfs_key_add(key);
+ ret = ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
- if (!local->wowlan) {
- ret = ieee80211_key_enable_hw_accel(key);
- if (ret)
- ieee80211_key_free(key, delay_tailroom);
+ if (!ret) {
+ ieee80211_debugfs_key_add(key);
+ ieee80211_key_destroy(old_key, delay_tailroom);
} else {
- ret = 0;
+ ieee80211_key_free(key, delay_tailroom);
}
out:
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 513627896204..83e71e6b2ebe 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -4,6 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -610,6 +611,18 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
local->ops = ops;
local->use_chanctx = use_chanctx;
+ /*
+ * We need a bit of data queued to build aggregates properly, so
+ * instruct the TCP stack to allow more than a single ms of data
+ * to be queued in the stack. The value is a bit-shift of 1
+ * second, so 8 is ~4ms of queued data. Only affects local TCP
+ * sockets.
+ * This is the default, anyhow - drivers may need to override it
+ * for local reasons (longer buffers, longer completion time, or
+ * similar).
+ */
+ local->hw.tx_sk_pacing_shift = 8;
+
/* set up some defaults */
local->hw.queues = 1;
local->hw.max_rates = 1;
@@ -684,6 +697,10 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending,
(unsigned long)local);
+ if (ops->wake_tx_queue)
+ tasklet_init(&local->wake_txqs_tasklet, ieee80211_wake_txqs,
+ (unsigned long)local);
+
tasklet_init(&local->tasklet,
ieee80211_tasklet_handler,
(unsigned long) local);
@@ -1154,6 +1171,53 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
goto fail_rate;
}
+ if (local->rate_ctrl) {
+ clear_bit(IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW, hw->flags);
+ if (local->rate_ctrl->ops->capa & RATE_CTRL_CAPA_VHT_EXT_NSS_BW)
+ ieee80211_hw_set(hw, SUPPORTS_VHT_EXT_NSS_BW);
+ }
+
+ /*
+ * If the VHT capabilities don't have IEEE80211_VHT_EXT_NSS_BW_CAPABLE,
+ * or have it when we don't, copy the sband structure and set/clear it.
+ * This is necessary because rate scaling algorithms could be switched
+ * and have different support values.
+ * Print a message so that in the common case the reallocation can be
+ * avoided.
+ */
+ BUILD_BUG_ON(NUM_NL80211_BANDS > 8 * sizeof(local->sband_allocated));
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
+ struct ieee80211_supported_band *sband;
+ bool local_cap, ie_cap;
+
+ local_cap = ieee80211_hw_check(hw, SUPPORTS_VHT_EXT_NSS_BW);
+
+ sband = local->hw.wiphy->bands[band];
+ if (!sband || !sband->vht_cap.vht_supported)
+ continue;
+
+ ie_cap = !!(sband->vht_cap.vht_mcs.tx_highest &
+ cpu_to_le16(IEEE80211_VHT_EXT_NSS_BW_CAPABLE));
+
+ if (local_cap == ie_cap)
+ continue;
+
+ sband = kmemdup(sband, sizeof(*sband), GFP_KERNEL);
+ if (!sband) {
+ result = -ENOMEM;
+ goto fail_rate;
+ }
+
+ wiphy_dbg(hw->wiphy, "copying sband (band %d) due to VHT EXT NSS BW flag\n",
+ band);
+
+ sband->vht_cap.vht_mcs.tx_highest ^=
+ cpu_to_le16(IEEE80211_VHT_EXT_NSS_BW_CAPABLE);
+
+ local->hw.wiphy->bands[band] = sband;
+ local->sband_allocated |= BIT(band);
+ }
+
/* add one default STA interface if supported */
if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION) &&
!ieee80211_hw_check(hw, NO_AUTO_VIF)) {
@@ -1272,6 +1336,7 @@ static int ieee80211_free_ack_frame(int id, void *p, void *data)
void ieee80211_free_hw(struct ieee80211_hw *hw)
{
struct ieee80211_local *local = hw_to_local(hw);
+ enum nl80211_band band;
mutex_destroy(&local->iflist_mtx);
mutex_destroy(&local->mtx);
@@ -1287,6 +1352,12 @@ void ieee80211_free_hw(struct ieee80211_hw *hw)
ieee80211_free_led_names(local);
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
+ if (!(local->sband_allocated & BIT(band)))
+ continue;
+ kfree(local->hw.wiphy->bands[band]);
+ }
+
wiphy_free(local->hw.wiphy);
}
EXPORT_SYMBOL(ieee80211_free_hw);
@@ -1304,18 +1375,12 @@ static int __init ieee80211_init(void)
if (ret)
return ret;
- ret = rc80211_minstrel_ht_init();
- if (ret)
- goto err_minstrel;
-
ret = ieee80211_iface_init();
if (ret)
goto err_netdev;
return 0;
err_netdev:
- rc80211_minstrel_ht_exit();
- err_minstrel:
rc80211_minstrel_exit();
return ret;
@@ -1323,7 +1388,6 @@ static int __init ieee80211_init(void)
static void __exit ieee80211_exit(void)
{
- rc80211_minstrel_ht_exit();
rc80211_minstrel_exit();
ieee80211s_stop();
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index d51da26e9c18..8bad414c52ad 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2008, 2009 open80211s Ltd.
+ * Copyright (C) 2018 Intel Corporation
* Authors: Luis Carlos Cobo <luisca@cozybit.com>
* Javier Cardona <javier@cozybit.com>
*
@@ -98,7 +99,9 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
cfg80211_chandef_create(&sta_chan_def, sdata->vif.bss_conf.chandef.chan,
NL80211_CHAN_NO_HT);
ieee80211_chandef_ht_oper(ie->ht_operation, &sta_chan_def);
- ieee80211_chandef_vht_oper(ie->vht_operation, &sta_chan_def);
+ ieee80211_chandef_vht_oper(&sdata->local->hw,
+ ie->vht_operation, ie->ht_operation,
+ &sta_chan_def);
if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef,
&sta_chan_def))
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 3dbecae4be73..d2bc8d57c87e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -220,7 +220,8 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
memcpy(&he_oper_vht_cap, he_oper->optional, 3);
he_oper_vht_cap.basic_mcs_set = cpu_to_le16(0);
- if (!ieee80211_chandef_vht_oper(&he_oper_vht_cap,
+ if (!ieee80211_chandef_vht_oper(&sdata->local->hw,
+ &he_oper_vht_cap, ht_oper,
&vht_chandef)) {
if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
sdata_info(sdata,
@@ -228,7 +229,8 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
ret = IEEE80211_STA_DISABLE_HE;
goto out;
}
- } else if (!ieee80211_chandef_vht_oper(vht_oper, &vht_chandef)) {
+ } else if (!ieee80211_chandef_vht_oper(&sdata->local->hw, vht_oper,
+ ht_oper, &vht_chandef)) {
if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
sdata_info(sdata,
"AP VHT information is invalid, disable VHT\n");
@@ -2759,13 +2761,40 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
auth_data->key_idx, tx_flags);
}
+static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata,
+ const u8 *bssid)
+{
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ struct sta_info *sta;
+
+ sdata_info(sdata, "authenticated\n");
+ ifmgd->auth_data->done = true;
+ ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_WAIT_ASSOC;
+ ifmgd->auth_data->timeout_started = true;
+ run_again(sdata, ifmgd->auth_data->timeout);
+
+ /* move station state to auth */
+ mutex_lock(&sdata->local->sta_mtx);
+ sta = sta_info_get(sdata, bssid);
+ if (!sta) {
+ WARN_ONCE(1, "%s: STA %pM not found", sdata->name, bssid);
+ return false;
+ }
+ if (sta_info_move_state(sta, IEEE80211_STA_AUTH)) {
+ sdata_info(sdata, "failed moving %pM to auth\n", bssid);
+ return false;
+ }
+ mutex_unlock(&sdata->local->sta_mtx);
+
+ return true;
+}
+
static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt, size_t len)
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
u8 bssid[ETH_ALEN];
u16 auth_alg, auth_transaction, status_code;
- struct sta_info *sta;
struct ieee80211_event event = {
.type = MLME_EVENT,
.u.mlme.data = AUTH_EVENT,
@@ -2789,7 +2818,11 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
status_code = le16_to_cpu(mgmt->u.auth.status_code);
if (auth_alg != ifmgd->auth_data->algorithm ||
- auth_transaction != ifmgd->auth_data->expected_transaction) {
+ (auth_alg != WLAN_AUTH_SAE &&
+ auth_transaction != ifmgd->auth_data->expected_transaction) ||
+ (auth_alg == WLAN_AUTH_SAE &&
+ (auth_transaction < ifmgd->auth_data->expected_transaction ||
+ auth_transaction > 2))) {
sdata_info(sdata, "%pM unexpected authentication state: alg %d (expected %d) transact %d (expected %d)\n",
mgmt->sa, auth_alg, ifmgd->auth_data->algorithm,
auth_transaction,
@@ -2832,35 +2865,17 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
event.u.mlme.status = MLME_SUCCESS;
drv_event_callback(sdata->local, sdata, &event);
- sdata_info(sdata, "authenticated\n");
- ifmgd->auth_data->done = true;
- ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_WAIT_ASSOC;
- ifmgd->auth_data->timeout_started = true;
- run_again(sdata, ifmgd->auth_data->timeout);
-
- if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE &&
- ifmgd->auth_data->expected_transaction != 2) {
- /*
- * Report auth frame to user space for processing since another
- * round of Authentication frames is still needed.
- */
- cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
- return;
+ if (ifmgd->auth_data->algorithm != WLAN_AUTH_SAE ||
+ (auth_transaction == 2 &&
+ ifmgd->auth_data->expected_transaction == 2)) {
+ if (!ieee80211_mark_sta_auth(sdata, bssid))
+ goto out_err;
+ } else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE &&
+ auth_transaction == 2) {
+ sdata_info(sdata, "SAE peer confirmed\n");
+ ifmgd->auth_data->peer_confirmed = true;
}
- /* move station state to auth */
- mutex_lock(&sdata->local->sta_mtx);
- sta = sta_info_get(sdata, bssid);
- if (!sta) {
- WARN_ONCE(1, "%s: STA %pM not found", sdata->name, bssid);
- goto out_err;
- }
- if (sta_info_move_state(sta, IEEE80211_STA_AUTH)) {
- sdata_info(sdata, "failed moving %pM to auth\n", bssid);
- goto out_err;
- }
- mutex_unlock(&sdata->local->sta_mtx);
-
cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
return;
out_err:
@@ -3237,19 +3252,16 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
}
if (bss_conf->he_support) {
- u32 he_oper_params =
- le32_to_cpu(elems.he_operation->he_oper_params);
+ bss_conf->bss_color =
+ le32_get_bits(elems.he_operation->he_oper_params,
+ IEEE80211_HE_OPERATION_BSS_COLOR_MASK);
- bss_conf->bss_color = he_oper_params &
- IEEE80211_HE_OPERATION_BSS_COLOR_MASK;
bss_conf->htc_trig_based_pkt_ext =
- (he_oper_params &
- IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK) <<
- IEEE80211_HE_OPERATION_DFLT_PE_DURATION_OFFSET;
+ le32_get_bits(elems.he_operation->he_oper_params,
+ IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK);
bss_conf->frame_time_rts_th =
- (he_oper_params &
- IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK) <<
- IEEE80211_HE_OPERATION_RTS_THRESHOLD_OFFSET;
+ le32_get_bits(elems.he_operation->he_oper_params,
+ IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK);
bss_conf->multi_sta_back_32bit =
sta->sta.he_cap.he_cap_elem.mac_cap_info[2] &
@@ -4879,6 +4891,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgd_auth_data *auth_data;
u16 auth_alg;
int err;
+ bool cont_auth;
/* prepare auth data structure */
@@ -4913,6 +4926,9 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
return -EOPNOTSUPP;
}
+ if (ifmgd->assoc_data)
+ return -EBUSY;
+
auth_data = kzalloc(sizeof(*auth_data) + req->auth_data_len +
req->ie_len, GFP_KERNEL);
if (!auth_data)
@@ -4932,6 +4948,13 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
auth_data->data_len += req->auth_data_len - 4;
}
+ /* Check if continuing authentication or trying to authenticate with the
+ * same BSS that we were in the process of authenticating with and avoid
+ * removal and re-addition of the STA entry in
+ * ieee80211_prep_connection().
+ */
+ cont_auth = ifmgd->auth_data && req->bss == ifmgd->auth_data->bss;
+
if (req->ie && req->ie_len) {
memcpy(&auth_data->data[auth_data->data_len],
req->ie, req->ie_len);
@@ -4948,18 +4971,26 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
/* try to authenticate/probe */
- if ((ifmgd->auth_data && !ifmgd->auth_data->done) ||
- ifmgd->assoc_data) {
- err = -EBUSY;
- goto err_free;
+ if (ifmgd->auth_data) {
+ if (cont_auth && req->auth_type == NL80211_AUTHTYPE_SAE) {
+ auth_data->peer_confirmed =
+ ifmgd->auth_data->peer_confirmed;
+ }
+ ieee80211_destroy_auth_data(sdata, cont_auth);
}
- if (ifmgd->auth_data)
- ieee80211_destroy_auth_data(sdata, false);
-
/* prep auth_data so we don't go into idle on disassoc */
ifmgd->auth_data = auth_data;
+ /* If this is continuation of an ongoing SAE authentication exchange
+ * (i.e., request to send SAE Confirm) and the peer has already
+ * confirmed, mark authentication completed since we are about to send
+ * out SAE Confirm.
+ */
+ if (cont_auth && req->auth_type == NL80211_AUTHTYPE_SAE &&
+ auth_data->peer_confirmed && auth_data->sae_trans == 2)
+ ieee80211_mark_sta_auth(sdata, req->bss->bssid);
+
if (ifmgd->associated) {
u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
@@ -4977,7 +5008,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid);
- err = ieee80211_prep_connection(sdata, req->bss, false, false);
+ err = ieee80211_prep_connection(sdata, req->bss, cont_auth, false);
if (err)
goto err_clear;
@@ -4998,7 +5029,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
mutex_lock(&sdata->local->mtx);
ieee80211_vif_release_channel(sdata);
mutex_unlock(&sdata->local->mtx);
- err_free:
kfree(auth_data);
return err;
}
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 8212bfeb71d6..d59198191a79 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -95,18 +95,5 @@ static inline void rc80211_minstrel_exit(void)
}
#endif
-#ifdef CONFIG_MAC80211_RC_MINSTREL_HT
-int rc80211_minstrel_ht_init(void);
-void rc80211_minstrel_ht_exit(void);
-#else
-static inline int rc80211_minstrel_ht_init(void)
-{
- return 0;
-}
-static inline void rc80211_minstrel_ht_exit(void)
-{
-}
-#endif
-
#endif /* IEEE80211_RATE_H */
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 07fb219327d6..a34e9c2ca626 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -167,12 +167,6 @@ minstrel_calc_rate_stats(struct minstrel_rate_stats *mrs)
if (unlikely(!mrs->att_hist)) {
mrs->prob_ewma = cur_prob;
} else {
- /* update exponential weighted moving variance */
- mrs->prob_ewmv = minstrel_ewmv(mrs->prob_ewmv,
- cur_prob,
- mrs->prob_ewma,
- EWMA_LEVEL);
-
/*update exponential weighted moving avarage */
mrs->prob_ewma = minstrel_ewma(mrs->prob_ewma,
cur_prob,
@@ -572,141 +566,6 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
minstrel_update_rates(mp, mi);
}
-static void *
-minstrel_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
-{
- struct ieee80211_supported_band *sband;
- struct minstrel_sta_info *mi;
- struct minstrel_priv *mp = priv;
- struct ieee80211_hw *hw = mp->hw;
- int max_rates = 0;
- int i;
-
- mi = kzalloc(sizeof(struct minstrel_sta_info), gfp);
- if (!mi)
- return NULL;
-
- for (i = 0; i < NUM_NL80211_BANDS; i++) {
- sband = hw->wiphy->bands[i];
- if (sband && sband->n_bitrates > max_rates)
- max_rates = sband->n_bitrates;
- }
-
- mi->r = kcalloc(max_rates, sizeof(struct minstrel_rate), gfp);
- if (!mi->r)
- goto error;
-
- mi->sample_table = kmalloc_array(max_rates, SAMPLE_COLUMNS, gfp);
- if (!mi->sample_table)
- goto error1;
-
- mi->last_stats_update = jiffies;
- return mi;
-
-error1:
- kfree(mi->r);
-error:
- kfree(mi);
- return NULL;
-}
-
-static void
-minstrel_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta)
-{
- struct minstrel_sta_info *mi = priv_sta;
-
- kfree(mi->sample_table);
- kfree(mi->r);
- kfree(mi);
-}
-
-static void
-minstrel_init_cck_rates(struct minstrel_priv *mp)
-{
- static const int bitrates[4] = { 10, 20, 55, 110 };
- struct ieee80211_supported_band *sband;
- u32 rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef);
- int i, j;
-
- sband = mp->hw->wiphy->bands[NL80211_BAND_2GHZ];
- if (!sband)
- return;
-
- for (i = 0, j = 0; i < sband->n_bitrates; i++) {
- struct ieee80211_rate *rate = &sband->bitrates[i];
-
- if (rate->flags & IEEE80211_RATE_ERP_G)
- continue;
-
- if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
- continue;
-
- for (j = 0; j < ARRAY_SIZE(bitrates); j++) {
- if (rate->bitrate != bitrates[j])
- continue;
-
- mp->cck_rates[j] = i;
- break;
- }
- }
-}
-
-static void *
-minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
-{
- struct minstrel_priv *mp;
-
- mp = kzalloc(sizeof(struct minstrel_priv), GFP_ATOMIC);
- if (!mp)
- return NULL;
-
- /* contention window settings
- * Just an approximation. Using the per-queue values would complicate
- * the calculations and is probably unnecessary */
- mp->cw_min = 15;
- mp->cw_max = 1023;
-
- /* number of packets (in %) to use for sampling other rates
- * sample less often for non-mrr packets, because the overhead
- * is much higher than with mrr */
- mp->lookaround_rate = 5;
- mp->lookaround_rate_mrr = 10;
-
- /* maximum time that the hw is allowed to stay in one MRR segment */
- mp->segment_size = 6000;
-
- if (hw->max_rate_tries > 0)
- mp->max_retry = hw->max_rate_tries;
- else
- /* safe default, does not necessarily have to match hw properties */
- mp->max_retry = 7;
-
- if (hw->max_rates >= 4)
- mp->has_mrr = true;
-
- mp->hw = hw;
- mp->update_interval = 100;
-
-#ifdef CONFIG_MAC80211_DEBUGFS
- mp->fixed_rate_idx = (u32) -1;
- mp->dbg_fixed_rate = debugfs_create_u32("fixed_rate_idx",
- 0666, debugfsdir, &mp->fixed_rate_idx);
-#endif
-
- minstrel_init_cck_rates(mp);
-
- return mp;
-}
-
-static void
-minstrel_free(void *priv)
-{
-#ifdef CONFIG_MAC80211_DEBUGFS
- debugfs_remove(((struct minstrel_priv *)priv)->dbg_fixed_rate);
-#endif
- kfree(priv);
-}
-
static u32 minstrel_get_expected_throughput(void *priv_sta)
{
struct minstrel_sta_info *mi = priv_sta;
@@ -725,29 +584,8 @@ static u32 minstrel_get_expected_throughput(void *priv_sta)
}
const struct rate_control_ops mac80211_minstrel = {
- .name = "minstrel",
.tx_status_ext = minstrel_tx_status,
.get_rate = minstrel_get_rate,
.rate_init = minstrel_rate_init,
- .alloc = minstrel_alloc,
- .free = minstrel_free,
- .alloc_sta = minstrel_alloc_sta,
- .free_sta = minstrel_free_sta,
-#ifdef CONFIG_MAC80211_DEBUGFS
- .add_sta_debugfs = minstrel_add_sta_debugfs,
- .remove_sta_debugfs = minstrel_remove_sta_debugfs,
-#endif
.get_expected_throughput = minstrel_get_expected_throughput,
};
-
-int __init
-rc80211_minstrel_init(void)
-{
- return ieee80211_rate_control_register(&mac80211_minstrel);
-}
-
-void
-rc80211_minstrel_exit(void)
-{
- ieee80211_rate_control_unregister(&mac80211_minstrel);
-}
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index be6c3f35f48b..23ec953e3a24 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -35,19 +35,6 @@ minstrel_ewma(int old, int new, int weight)
return old + incr;
}
-/*
- * Perform EWMV (Exponentially Weighted Moving Variance) calculation
- */
-static inline int
-minstrel_ewmv(int old_ewmv, int cur_prob, int prob_ewma, int weight)
-{
- int diff, incr;
-
- diff = cur_prob - prob_ewma;
- incr = (EWMA_DIV - weight) * diff / EWMA_DIV;
- return weight * (old_ewmv + MINSTREL_TRUNC(diff * incr)) / EWMA_DIV;
-}
-
struct minstrel_rate_stats {
/* current / last sampling period attempts/success counters */
u16 attempts, last_attempts;
@@ -56,11 +43,8 @@ struct minstrel_rate_stats {
/* total attempts/success counters */
u32 att_hist, succ_hist;
- /* statistis of packet delivery probability
- * prob_ewma - exponential weighted moving average of prob
- * prob_ewmsd - exp. weighted moving standard deviation of prob */
+ /* prob_ewma - exponential weighted moving average of prob */
u16 prob_ewma;
- u16 prob_ewmv;
/* maximum retry counts */
u8 retry_count;
@@ -109,11 +93,6 @@ struct minstrel_sta_info {
/* sampling table */
u8 *sample_table;
-
-#ifdef CONFIG_MAC80211_DEBUGFS
- struct dentry *dbg_stats;
- struct dentry *dbg_stats_csv;
-#endif
};
struct minstrel_priv {
@@ -137,7 +116,6 @@ struct minstrel_priv {
* - setting will be applied on next update
*/
u32 fixed_rate_idx;
- struct dentry *dbg_fixed_rate;
#endif
};
@@ -146,17 +124,8 @@ struct minstrel_debugfs_info {
char buf[];
};
-/* Get EWMSD (Exponentially Weighted Moving Standard Deviation) * 10 */
-static inline int
-minstrel_get_ewmsd10(struct minstrel_rate_stats *mrs)
-{
- unsigned int ewmv = mrs->prob_ewmv;
- return int_sqrt(MINSTREL_TRUNC(ewmv * 1000 * 1000));
-}
-
extern const struct rate_control_ops mac80211_minstrel;
void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
-void minstrel_remove_sta_debugfs(void *priv, void *priv_sta);
/* Recalculate success probabilities and counters for a given rate using EWMA */
void minstrel_calc_rate_stats(struct minstrel_rate_stats *mrs);
@@ -165,7 +134,5 @@ int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma);
/* debugfs */
int minstrel_stats_open(struct inode *inode, struct file *file);
int minstrel_stats_csv_open(struct inode *inode, struct file *file);
-ssize_t minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos);
-int minstrel_stats_release(struct inode *inode, struct file *file);
#endif
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index 9ad7d63d3e5b..c8afd85b51a0 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -54,22 +54,6 @@
#include <net/mac80211.h>
#include "rc80211_minstrel.h"
-ssize_t
-minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos)
-{
- struct minstrel_debugfs_info *ms;
-
- ms = file->private_data;
- return simple_read_from_buffer(buf, len, ppos, ms->buf, ms->len);
-}
-
-int
-minstrel_stats_release(struct inode *inode, struct file *file)
-{
- kfree(file->private_data);
- return 0;
-}
-
int
minstrel_stats_open(struct inode *inode, struct file *file)
{
@@ -86,14 +70,13 @@ minstrel_stats_open(struct inode *inode, struct file *file)
p = ms->buf;
p += sprintf(p, "\n");
p += sprintf(p,
- "best __________rate_________ ________statistics________ ____last_____ ______sum-of________\n");
+ "best __________rate_________ ____statistics___ ____last_____ ______sum-of________\n");
p += sprintf(p,
- "rate [name idx airtime max_tp] [avg(tp) avg(prob) sd(prob)] [retry|suc|att] [#success | #attempts]\n");
+ "rate [name idx airtime max_tp] [avg(tp) avg(prob)] [retry|suc|att] [#success | #attempts]\n");
for (i = 0; i < mi->n_rates; i++) {
struct minstrel_rate *mr = &mi->r[i];
struct minstrel_rate_stats *mrs = &mi->r[i].stats;
- unsigned int prob_ewmsd;
*(p++) = (i == mi->max_tp_rate[0]) ? 'A' : ' ';
*(p++) = (i == mi->max_tp_rate[1]) ? 'B' : ' ';
@@ -109,15 +92,13 @@ minstrel_stats_open(struct inode *inode, struct file *file)
tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100));
tp_avg = minstrel_get_tp_avg(mr, mrs->prob_ewma);
eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
- prob_ewmsd = minstrel_get_ewmsd10(mrs);
- p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u %3u.%1u"
+ p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u"
" %3u %3u %-3u "
"%9llu %-9llu\n",
tp_max / 10, tp_max % 10,
tp_avg / 10, tp_avg % 10,
eprob / 10, eprob % 10,
- prob_ewmsd / 10, prob_ewmsd % 10,
mrs->retry_count,
mrs->last_success,
mrs->last_attempts,
@@ -135,14 +116,6 @@ minstrel_stats_open(struct inode *inode, struct file *file)
return 0;
}
-static const struct file_operations minstrel_stat_fops = {
- .owner = THIS_MODULE,
- .open = minstrel_stats_open,
- .read = minstrel_stats_read,
- .release = minstrel_stats_release,
- .llseek = default_llseek,
-};
-
int
minstrel_stats_csv_open(struct inode *inode, struct file *file)
{
@@ -161,7 +134,6 @@ minstrel_stats_csv_open(struct inode *inode, struct file *file)
for (i = 0; i < mi->n_rates; i++) {
struct minstrel_rate *mr = &mi->r[i];
struct minstrel_rate_stats *mrs = &mi->r[i].stats;
- unsigned int prob_ewmsd;
p += sprintf(p, "%s" ,((i == mi->max_tp_rate[0]) ? "A" : ""));
p += sprintf(p, "%s" ,((i == mi->max_tp_rate[1]) ? "B" : ""));
@@ -177,14 +149,12 @@ minstrel_stats_csv_open(struct inode *inode, struct file *file)
tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100));
tp_avg = minstrel_get_tp_avg(mr, mrs->prob_ewma);
eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
- prob_ewmsd = minstrel_get_ewmsd10(mrs);
- p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u.%u,%u,%u,%u,"
+ p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u,%u,%u,"
"%llu,%llu,%d,%d\n",
tp_max / 10, tp_max % 10,
tp_avg / 10, tp_avg % 10,
eprob / 10, eprob % 10,
- prob_ewmsd / 10, prob_ewmsd % 10,
mrs->retry_count,
mrs->last_success,
mrs->last_attempts,
@@ -200,33 +170,3 @@ minstrel_stats_csv_open(struct inode *inode, struct file *file)
return 0;
}
-
-static const struct file_operations minstrel_stat_csv_fops = {
- .owner = THIS_MODULE,
- .open = minstrel_stats_csv_open,
- .read = minstrel_stats_read,
- .release = minstrel_stats_release,
- .llseek = default_llseek,
-};
-
-void
-minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir)
-{
- struct minstrel_sta_info *mi = priv_sta;
-
- mi->dbg_stats = debugfs_create_file("rc_stats", 0444, dir, mi,
- &minstrel_stat_fops);
-
- mi->dbg_stats_csv = debugfs_create_file("rc_stats_csv", 0444, dir, mi,
- &minstrel_stat_csv_fops);
-}
-
-void
-minstrel_remove_sta_debugfs(void *priv, void *priv_sta)
-{
- struct minstrel_sta_info *mi = priv_sta;
-
- debugfs_remove(mi->dbg_stats);
-
- debugfs_remove(mi->dbg_stats_csv);
-}
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 67ebdeaffbbc..f466ec37d161 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -52,22 +52,23 @@
_streams - 1
/* MCS rate information for an MCS group */
-#define MCS_GROUP(_streams, _sgi, _ht40) \
+#define MCS_GROUP(_streams, _sgi, _ht40, _s) \
[GROUP_IDX(_streams, _sgi, _ht40)] = { \
.streams = _streams, \
+ .shift = _s, \
.flags = \
IEEE80211_TX_RC_MCS | \
(_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) | \
(_ht40 ? IEEE80211_TX_RC_40_MHZ_WIDTH : 0), \
.duration = { \
- MCS_DURATION(_streams, _sgi, _ht40 ? 54 : 26), \
- MCS_DURATION(_streams, _sgi, _ht40 ? 108 : 52), \
- MCS_DURATION(_streams, _sgi, _ht40 ? 162 : 78), \
- MCS_DURATION(_streams, _sgi, _ht40 ? 216 : 104), \
- MCS_DURATION(_streams, _sgi, _ht40 ? 324 : 156), \
- MCS_DURATION(_streams, _sgi, _ht40 ? 432 : 208), \
- MCS_DURATION(_streams, _sgi, _ht40 ? 486 : 234), \
- MCS_DURATION(_streams, _sgi, _ht40 ? 540 : 260) \
+ MCS_DURATION(_streams, _sgi, _ht40 ? 54 : 26) >> _s, \
+ MCS_DURATION(_streams, _sgi, _ht40 ? 108 : 52) >> _s, \
+ MCS_DURATION(_streams, _sgi, _ht40 ? 162 : 78) >> _s, \
+ MCS_DURATION(_streams, _sgi, _ht40 ? 216 : 104) >> _s, \
+ MCS_DURATION(_streams, _sgi, _ht40 ? 324 : 156) >> _s, \
+ MCS_DURATION(_streams, _sgi, _ht40 ? 432 : 208) >> _s, \
+ MCS_DURATION(_streams, _sgi, _ht40 ? 486 : 234) >> _s, \
+ MCS_DURATION(_streams, _sgi, _ht40 ? 540 : 260) >> _s \
} \
}
@@ -80,9 +81,10 @@
#define BW2VBPS(_bw, r3, r2, r1) \
(_bw == BW_80 ? r3 : _bw == BW_40 ? r2 : r1)
-#define VHT_GROUP(_streams, _sgi, _bw) \
+#define VHT_GROUP(_streams, _sgi, _bw, _s) \
[VHT_GROUP_IDX(_streams, _sgi, _bw)] = { \
.streams = _streams, \
+ .shift = _s, \
.flags = \
IEEE80211_TX_RC_VHT_MCS | \
(_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) | \
@@ -90,25 +92,25 @@
_bw == BW_40 ? IEEE80211_TX_RC_40_MHZ_WIDTH : 0), \
.duration = { \
MCS_DURATION(_streams, _sgi, \
- BW2VBPS(_bw, 117, 54, 26)), \
+ BW2VBPS(_bw, 117, 54, 26)) >> _s, \
MCS_DURATION(_streams, _sgi, \
- BW2VBPS(_bw, 234, 108, 52)), \
+ BW2VBPS(_bw, 234, 108, 52)) >> _s, \
MCS_DURATION(_streams, _sgi, \
- BW2VBPS(_bw, 351, 162, 78)), \
+ BW2VBPS(_bw, 351, 162, 78)) >> _s, \
MCS_DURATION(_streams, _sgi, \
- BW2VBPS(_bw, 468, 216, 104)), \
+ BW2VBPS(_bw, 468, 216, 104)) >> _s, \
MCS_DURATION(_streams, _sgi, \
- BW2VBPS(_bw, 702, 324, 156)), \
+ BW2VBPS(_bw, 702, 324, 156)) >> _s, \
MCS_DURATION(_streams, _sgi, \
- BW2VBPS(_bw, 936, 432, 208)), \
+ BW2VBPS(_bw, 936, 432, 208)) >> _s, \
MCS_DURATION(_streams, _sgi, \
- BW2VBPS(_bw, 1053, 486, 234)), \
+ BW2VBPS(_bw, 1053, 486, 234)) >> _s, \
MCS_DURATION(_streams, _sgi, \
- BW2VBPS(_bw, 1170, 540, 260)), \
+ BW2VBPS(_bw, 1170, 540, 260)) >> _s, \
MCS_DURATION(_streams, _sgi, \
- BW2VBPS(_bw, 1404, 648, 312)), \
+ BW2VBPS(_bw, 1404, 648, 312)) >> _s, \
MCS_DURATION(_streams, _sgi, \
- BW2VBPS(_bw, 1560, 720, 346)) \
+ BW2VBPS(_bw, 1560, 720, 346)) >> _s \
} \
}
@@ -121,28 +123,27 @@
(CCK_DURATION((_bitrate > 10 ? 20 : 10), false, 60) + \
CCK_DURATION(_bitrate, _short, AVG_PKT_SIZE))
-#define CCK_DURATION_LIST(_short) \
- CCK_ACK_DURATION(10, _short), \
- CCK_ACK_DURATION(20, _short), \
- CCK_ACK_DURATION(55, _short), \
- CCK_ACK_DURATION(110, _short)
+#define CCK_DURATION_LIST(_short, _s) \
+ CCK_ACK_DURATION(10, _short) >> _s, \
+ CCK_ACK_DURATION(20, _short) >> _s, \
+ CCK_ACK_DURATION(55, _short) >> _s, \
+ CCK_ACK_DURATION(110, _short) >> _s
-#define CCK_GROUP \
+#define CCK_GROUP(_s) \
[MINSTREL_CCK_GROUP] = { \
- .streams = 0, \
+ .streams = 1, \
.flags = 0, \
+ .shift = _s, \
.duration = { \
- CCK_DURATION_LIST(false), \
- CCK_DURATION_LIST(true) \
+ CCK_DURATION_LIST(false, _s), \
+ CCK_DURATION_LIST(true, _s) \
} \
}
-#ifdef CONFIG_MAC80211_RC_MINSTREL_VHT
static bool minstrel_vht_only = true;
module_param(minstrel_vht_only, bool, 0644);
MODULE_PARM_DESC(minstrel_vht_only,
"Use only VHT rates when VHT is supported by sta.");
-#endif
/*
* To enable sufficiently targeted rate sampling, MCS rates are divided into
@@ -153,49 +154,47 @@ MODULE_PARM_DESC(minstrel_vht_only,
* BW -> SGI -> #streams
*/
const struct mcs_group minstrel_mcs_groups[] = {
- MCS_GROUP(1, 0, BW_20),
- MCS_GROUP(2, 0, BW_20),
- MCS_GROUP(3, 0, BW_20),
+ MCS_GROUP(1, 0, BW_20, 5),
+ MCS_GROUP(2, 0, BW_20, 4),
+ MCS_GROUP(3, 0, BW_20, 4),
- MCS_GROUP(1, 1, BW_20),
- MCS_GROUP(2, 1, BW_20),
- MCS_GROUP(3, 1, BW_20),
+ MCS_GROUP(1, 1, BW_20, 5),
+ MCS_GROUP(2, 1, BW_20, 4),
+ MCS_GROUP(3, 1, BW_20, 4),
- MCS_GROUP(1, 0, BW_40),
- MCS_GROUP(2, 0, BW_40),
- MCS_GROUP(3, 0, BW_40),
+ MCS_GROUP(1, 0, BW_40, 4),
+ MCS_GROUP(2, 0, BW_40, 4),
+ MCS_GROUP(3, 0, BW_40, 4),
- MCS_GROUP(1, 1, BW_40),
- MCS_GROUP(2, 1, BW_40),
- MCS_GROUP(3, 1, BW_40),
+ MCS_GROUP(1, 1, BW_40, 4),
+ MCS_GROUP(2, 1, BW_40, 4),
+ MCS_GROUP(3, 1, BW_40, 4),
- CCK_GROUP,
+ CCK_GROUP(8),
-#ifdef CONFIG_MAC80211_RC_MINSTREL_VHT
- VHT_GROUP(1, 0, BW_20),
- VHT_GROUP(2, 0, BW_20),
- VHT_GROUP(3, 0, BW_20),
+ VHT_GROUP(1, 0, BW_20, 5),
+ VHT_GROUP(2, 0, BW_20, 4),
+ VHT_GROUP(3, 0, BW_20, 4),
- VHT_GROUP(1, 1, BW_20),
- VHT_GROUP(2, 1, BW_20),
- VHT_GROUP(3, 1, BW_20),
+ VHT_GROUP(1, 1, BW_20, 5),
+ VHT_GROUP(2, 1, BW_20, 4),
+ VHT_GROUP(3, 1, BW_20, 4),
- VHT_GROUP(1, 0, BW_40),
- VHT_GROUP(2, 0, BW_40),
- VHT_GROUP(3, 0, BW_40),
+ VHT_GROUP(1, 0, BW_40, 4),
+ VHT_GROUP(2, 0, BW_40, 4),
+ VHT_GROUP(3, 0, BW_40, 4),
- VHT_GROUP(1, 1, BW_40),
- VHT_GROUP(2, 1, BW_40),
- VHT_GROUP(3, 1, BW_40),
+ VHT_GROUP(1, 1, BW_40, 4),
+ VHT_GROUP(2, 1, BW_40, 4),
+ VHT_GROUP(3, 1, BW_40, 4),
- VHT_GROUP(1, 0, BW_80),
- VHT_GROUP(2, 0, BW_80),
- VHT_GROUP(3, 0, BW_80),
+ VHT_GROUP(1, 0, BW_80, 4),
+ VHT_GROUP(2, 0, BW_80, 4),
+ VHT_GROUP(3, 0, BW_80, 4),
- VHT_GROUP(1, 1, BW_80),
- VHT_GROUP(2, 1, BW_80),
- VHT_GROUP(3, 1, BW_80),
-#endif
+ VHT_GROUP(1, 1, BW_80, 4),
+ VHT_GROUP(2, 1, BW_80, 4),
+ VHT_GROUP(3, 1, BW_80, 4),
};
static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES] __read_mostly;
@@ -282,7 +281,8 @@ minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
break;
/* short preamble */
- if (!(mi->supported[group] & BIT(idx)))
+ if ((mi->supported[group] & BIT(idx + 4)) &&
+ (rate->flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE))
idx += 4;
}
return &mi->groups[group].rates[idx];
@@ -311,7 +311,8 @@ minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
if (group != MINSTREL_CCK_GROUP)
nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len);
- nsecs += minstrel_mcs_groups[group].duration[rate];
+ nsecs += minstrel_mcs_groups[group].duration[rate] <<
+ minstrel_mcs_groups[group].shift;
/*
* For the throughput calculation, limit the probability value to 90% to
@@ -759,12 +760,19 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
minstrel_ht_update_rates(mp, mi);
}
+static inline int
+minstrel_get_duration(int index)
+{
+ const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
+ unsigned int duration = group->duration[index % MCS_GROUP_RATES];
+ return duration << group->shift;
+}
+
static void
minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
int index)
{
struct minstrel_rate_stats *mrs;
- const struct mcs_group *group;
unsigned int tx_time, tx_time_rtscts, tx_time_data;
unsigned int cw = mp->cw_min;
unsigned int ctime = 0;
@@ -783,8 +791,7 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
mrs->retry_count_rtscts = 2;
mrs->retry_updated = true;
- group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
- tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len / 1000;
+ tx_time_data = minstrel_get_duration(index) * ampdu_len / 1000;
/* Contention time for first 2 tries */
ctime = (t_slot * cw) >> 1;
@@ -878,20 +885,24 @@ minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi)
int group = mi->max_prob_rate / MCS_GROUP_RATES;
const struct mcs_group *g = &minstrel_mcs_groups[group];
int rate = mi->max_prob_rate % MCS_GROUP_RATES;
+ unsigned int duration;
/* Disable A-MSDU if max_prob_rate is bad */
if (mi->groups[group].rates[rate].prob_ewma < MINSTREL_FRAC(50, 100))
return 1;
+ duration = g->duration[rate];
+ duration <<= g->shift;
+
/* If the rate is slower than single-stream MCS1, make A-MSDU limit small */
- if (g->duration[rate] > MCS_DURATION(1, 0, 52))
+ if (duration > MCS_DURATION(1, 0, 52))
return 500;
/*
* If the rate is slower than single-stream MCS4, limit A-MSDU to usual
* data packet size
*/
- if (g->duration[rate] > MCS_DURATION(1, 0, 104))
+ if (duration > MCS_DURATION(1, 0, 104))
return 1600;
/*
@@ -899,7 +910,7 @@ minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi)
* rate success probability is less than 75%, limit A-MSDU to twice the usual
* data packet size
*/
- if (g->duration[rate] > MCS_DURATION(1, 0, 260) ||
+ if (duration > MCS_DURATION(1, 0, 260) ||
(minstrel_ht_get_prob_ewma(mi, mi->max_tp_rate[0]) <
MINSTREL_FRAC(75, 100)))
return 3200;
@@ -946,13 +957,6 @@ minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
rate_control_set_rates(mp->hw, mi->sta, rates);
}
-static inline int
-minstrel_get_duration(int index)
-{
- const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
- return group->duration[index % MCS_GROUP_RATES];
-}
-
static int
minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
{
@@ -1000,10 +1004,13 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
return -1;
/*
- * Do not sample if the probability is already higher than 95%
- * to avoid wasting airtime.
+ * Do not sample if the probability is already higher than 95%,
+ * or if the rate is 3 times slower than the current max probability
+ * rate, to avoid wasting airtime.
*/
- if (mrs->prob_ewma > MINSTREL_FRAC(95, 100))
+ sample_dur = minstrel_get_duration(sample_idx);
+ if (mrs->prob_ewma > MINSTREL_FRAC(95, 100) ||
+ minstrel_get_duration(mi->max_prob_rate) * 3 < sample_dur)
return -1;
/*
@@ -1013,7 +1020,6 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
cur_max_tp_streams = minstrel_mcs_groups[tp_rate1 /
MCS_GROUP_RATES].streams;
- sample_dur = minstrel_get_duration(sample_idx);
if (sample_dur >= minstrel_get_duration(tp_rate2) &&
(cur_max_tp_streams - 1 <
minstrel_mcs_groups[sample_group].streams ||
@@ -1077,18 +1083,23 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
return;
sample_group = &minstrel_mcs_groups[sample_idx / MCS_GROUP_RATES];
+ sample_idx %= MCS_GROUP_RATES;
+
+ if (sample_group == &minstrel_mcs_groups[MINSTREL_CCK_GROUP] &&
+ (sample_idx >= 4) != txrc->short_preamble)
+ return;
+
info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
rate->count = 1;
- if (sample_idx / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) {
+ if (sample_group == &minstrel_mcs_groups[MINSTREL_CCK_GROUP]) {
int idx = sample_idx % ARRAY_SIZE(mp->cck_rates);
rate->idx = mp->cck_rates[idx];
} else if (sample_group->flags & IEEE80211_TX_RC_VHT_MCS) {
ieee80211_rate_set_vht(rate, sample_idx % MCS_GROUP_RATES,
sample_group->streams);
} else {
- rate->idx = sample_idx % MCS_GROUP_RATES +
- (sample_group->streams - 1) * 8;
+ rate->idx = sample_idx + (sample_group->streams - 1) * 8;
}
rate->flags = sample_group->flags;
@@ -1130,14 +1141,14 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
struct minstrel_ht_sta_priv *msp = priv_sta;
struct minstrel_ht_sta *mi = &msp->ht;
struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs;
- u16 sta_cap = sta->ht_cap.cap;
+ u16 ht_cap = sta->ht_cap.cap;
struct ieee80211_sta_vht_cap *vht_cap = &sta->vht_cap;
- struct sta_info *sinfo = container_of(sta, struct sta_info, sta);
int use_vht;
int n_supported = 0;
int ack_dur;
int stbc;
int i;
+ bool ldpc;
/* fall back to the old minstrel for legacy stations */
if (!sta->ht_cap.ht_supported)
@@ -1145,12 +1156,10 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) != MINSTREL_GROUPS_NB);
-#ifdef CONFIG_MAC80211_RC_MINSTREL_VHT
if (vht_cap->vht_supported)
use_vht = vht_cap->vht_mcs.tx_mcs_map != cpu_to_le16(~0);
else
-#endif
- use_vht = 0;
+ use_vht = 0;
msp->is_ht = true;
memset(mi, 0, sizeof(*mi));
@@ -1175,16 +1184,22 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
}
mi->sample_tries = 4;
- /* TODO tx_flags for vht - ATM the RC API is not fine-grained enough */
if (!use_vht) {
- stbc = (sta_cap & IEEE80211_HT_CAP_RX_STBC) >>
+ stbc = (ht_cap & IEEE80211_HT_CAP_RX_STBC) >>
IEEE80211_HT_CAP_RX_STBC_SHIFT;
- mi->tx_flags |= stbc << IEEE80211_TX_CTL_STBC_SHIFT;
- if (sta_cap & IEEE80211_HT_CAP_LDPC_CODING)
- mi->tx_flags |= IEEE80211_TX_CTL_LDPC;
+ ldpc = ht_cap & IEEE80211_HT_CAP_LDPC_CODING;
+ } else {
+ stbc = (vht_cap->cap & IEEE80211_VHT_CAP_RXSTBC_MASK) >>
+ IEEE80211_VHT_CAP_RXSTBC_SHIFT;
+
+ ldpc = vht_cap->cap & IEEE80211_VHT_CAP_RXLDPC;
}
+ mi->tx_flags |= stbc << IEEE80211_TX_CTL_STBC_SHIFT;
+ if (ldpc)
+ mi->tx_flags |= IEEE80211_TX_CTL_LDPC;
+
for (i = 0; i < ARRAY_SIZE(mi->groups); i++) {
u32 gflags = minstrel_mcs_groups[i].flags;
int bw, nss;
@@ -1197,10 +1212,10 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
if (gflags & IEEE80211_TX_RC_SHORT_GI) {
if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH) {
- if (!(sta_cap & IEEE80211_HT_CAP_SGI_40))
+ if (!(ht_cap & IEEE80211_HT_CAP_SGI_40))
continue;
} else {
- if (!(sta_cap & IEEE80211_HT_CAP_SGI_20))
+ if (!(ht_cap & IEEE80211_HT_CAP_SGI_20))
continue;
}
}
@@ -1217,10 +1232,9 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
/* HT rate */
if (gflags & IEEE80211_TX_RC_MCS) {
-#ifdef CONFIG_MAC80211_RC_MINSTREL_VHT
if (use_vht && minstrel_vht_only)
continue;
-#endif
+
mi->supported[i] = mcs->rx_mask[nss - 1];
if (mi->supported[i])
n_supported++;
@@ -1258,8 +1272,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
if (!n_supported)
goto use_legacy;
- if (test_sta_flag(sinfo, WLAN_STA_SHORT_PREAMBLE))
- mi->cck_supported_short |= mi->cck_supported_short << 4;
+ mi->supported[MINSTREL_CCK_GROUP] |= mi->cck_supported_short << 4;
/* create an initial rate table with the lowest supported rates */
minstrel_ht_update_stats(mp, mi);
@@ -1340,16 +1353,88 @@ minstrel_ht_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta)
kfree(msp);
}
+static void
+minstrel_ht_init_cck_rates(struct minstrel_priv *mp)
+{
+ static const int bitrates[4] = { 10, 20, 55, 110 };
+ struct ieee80211_supported_band *sband;
+ u32 rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef);
+ int i, j;
+
+ sband = mp->hw->wiphy->bands[NL80211_BAND_2GHZ];
+ if (!sband)
+ return;
+
+ for (i = 0; i < sband->n_bitrates; i++) {
+ struct ieee80211_rate *rate = &sband->bitrates[i];
+
+ if (rate->flags & IEEE80211_RATE_ERP_G)
+ continue;
+
+ if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
+ continue;
+
+ for (j = 0; j < ARRAY_SIZE(bitrates); j++) {
+ if (rate->bitrate != bitrates[j])
+ continue;
+
+ mp->cck_rates[j] = i;
+ break;
+ }
+ }
+}
+
static void *
minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
{
- return mac80211_minstrel.alloc(hw, debugfsdir);
+ struct minstrel_priv *mp;
+
+ mp = kzalloc(sizeof(struct minstrel_priv), GFP_ATOMIC);
+ if (!mp)
+ return NULL;
+
+ /* contention window settings
+ * Just an approximation. Using the per-queue values would complicate
+ * the calculations and is probably unnecessary */
+ mp->cw_min = 15;
+ mp->cw_max = 1023;
+
+ /* number of packets (in %) to use for sampling other rates
+ * sample less often for non-mrr packets, because the overhead
+ * is much higher than with mrr */
+ mp->lookaround_rate = 5;
+ mp->lookaround_rate_mrr = 10;
+
+ /* maximum time that the hw is allowed to stay in one MRR segment */
+ mp->segment_size = 6000;
+
+ if (hw->max_rate_tries > 0)
+ mp->max_retry = hw->max_rate_tries;
+ else
+ /* safe default, does not necessarily have to match hw properties */
+ mp->max_retry = 7;
+
+ if (hw->max_rates >= 4)
+ mp->has_mrr = true;
+
+ mp->hw = hw;
+ mp->update_interval = 100;
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+ mp->fixed_rate_idx = (u32) -1;
+ debugfs_create_u32("fixed_rate_idx", S_IRUGO | S_IWUGO, debugfsdir,
+ &mp->fixed_rate_idx);
+#endif
+
+ minstrel_ht_init_cck_rates(mp);
+
+ return mp;
}
static void
minstrel_ht_free(void *priv)
{
- mac80211_minstrel.free(priv);
+ kfree(priv);
}
static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
@@ -1384,7 +1469,6 @@ static const struct rate_control_ops mac80211_minstrel_ht = {
.free = minstrel_ht_free,
#ifdef CONFIG_MAC80211_DEBUGFS
.add_sta_debugfs = minstrel_ht_add_sta_debugfs,
- .remove_sta_debugfs = minstrel_ht_remove_sta_debugfs,
#endif
.get_expected_throughput = minstrel_ht_get_expected_throughput,
};
@@ -1409,14 +1493,14 @@ static void __init init_sample_table(void)
}
int __init
-rc80211_minstrel_ht_init(void)
+rc80211_minstrel_init(void)
{
init_sample_table();
return ieee80211_rate_control_register(&mac80211_minstrel_ht);
}
void
-rc80211_minstrel_ht_exit(void)
+rc80211_minstrel_exit(void)
{
ieee80211_rate_control_unregister(&mac80211_minstrel_ht);
}
diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h
index de1646c42e82..26b7a3244b47 100644
--- a/net/mac80211/rc80211_minstrel_ht.h
+++ b/net/mac80211/rc80211_minstrel_ht.h
@@ -15,11 +15,7 @@
*/
#define MINSTREL_MAX_STREAMS 3
#define MINSTREL_HT_STREAM_GROUPS 4 /* BW(=2) * SGI(=2) */
-#ifdef CONFIG_MAC80211_RC_MINSTREL_VHT
#define MINSTREL_VHT_STREAM_GROUPS 6 /* BW(=3) * SGI(=2) */
-#else
-#define MINSTREL_VHT_STREAM_GROUPS 0
-#endif
#define MINSTREL_HT_GROUPS_NB (MINSTREL_MAX_STREAMS * \
MINSTREL_HT_STREAM_GROUPS)
@@ -34,16 +30,13 @@
#define MINSTREL_CCK_GROUP (MINSTREL_HT_GROUP_0 + MINSTREL_HT_GROUPS_NB)
#define MINSTREL_VHT_GROUP_0 (MINSTREL_CCK_GROUP + 1)
-#ifdef CONFIG_MAC80211_RC_MINSTREL_VHT
#define MCS_GROUP_RATES 10
-#else
-#define MCS_GROUP_RATES 8
-#endif
struct mcs_group {
- u32 flags;
- unsigned int streams;
- unsigned int duration[MCS_GROUP_RATES];
+ u16 flags;
+ u8 streams;
+ u8 shift;
+ u16 duration[MCS_GROUP_RATES];
};
extern const struct mcs_group minstrel_mcs_groups[];
@@ -110,17 +103,12 @@ struct minstrel_ht_sta_priv {
struct minstrel_ht_sta ht;
struct minstrel_sta_info legacy;
};
-#ifdef CONFIG_MAC80211_DEBUGFS
- struct dentry *dbg_stats;
- struct dentry *dbg_stats_csv;
-#endif
void *ratelist;
void *sample_table;
bool is_ht;
};
void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
-void minstrel_ht_remove_sta_debugfs(void *priv, void *priv_sta);
int minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
int prob_ewma);
diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
index bfcc03152dc6..57820a5f2c16 100644
--- a/net/mac80211/rc80211_minstrel_ht_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c
@@ -15,6 +15,22 @@
#include "rc80211_minstrel.h"
#include "rc80211_minstrel_ht.h"
+static ssize_t
+minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos)
+{
+ struct minstrel_debugfs_info *ms;
+
+ ms = file->private_data;
+ return simple_read_from_buffer(buf, len, ppos, ms->buf, ms->len);
+}
+
+static int
+minstrel_stats_release(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ return 0;
+}
+
static char *
minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p)
{
@@ -41,7 +57,7 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p)
struct minstrel_rate_stats *mrs = &mi->groups[i].rates[j];
static const int bitrates[4] = { 10, 20, 55, 110 };
int idx = i * MCS_GROUP_RATES + j;
- unsigned int prob_ewmsd;
+ unsigned int duration;
if (!(mi->supported[i] & BIT(j)))
continue;
@@ -79,21 +95,21 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p)
p += sprintf(p, " %3u ", idx);
/* tx_time[rate(i)] in usec */
- tx_time = DIV_ROUND_CLOSEST(mg->duration[j], 1000);
+ duration = mg->duration[j];
+ duration <<= mg->shift;
+ tx_time = DIV_ROUND_CLOSEST(duration, 1000);
p += sprintf(p, "%6u ", tx_time);
tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100));
tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_ewma);
eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
- prob_ewmsd = minstrel_get_ewmsd10(mrs);
- p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u %3u.%1u"
+ p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u"
" %3u %3u %-3u "
"%9llu %-9llu\n",
tp_max / 10, tp_max % 10,
tp_avg / 10, tp_avg % 10,
eprob / 10, eprob % 10,
- prob_ewmsd / 10, prob_ewmsd % 10,
mrs->retry_count,
mrs->last_success,
mrs->last_attempts,
@@ -130,9 +146,9 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file)
p += sprintf(p, "\n");
p += sprintf(p,
- " best ____________rate__________ ________statistics________ _____last____ ______sum-of________\n");
+ " best ____________rate__________ ____statistics___ _____last____ ______sum-of________\n");
p += sprintf(p,
- "mode guard # rate [name idx airtime max_tp] [avg(tp) avg(prob) sd(prob)] [retry|suc|att] [#success | #attempts]\n");
+ "mode guard # rate [name idx airtime max_tp] [avg(tp) avg(prob)] [retry|suc|att] [#success | #attempts]\n");
p = minstrel_ht_stats_dump(mi, MINSTREL_CCK_GROUP, p);
for (i = 0; i < MINSTREL_CCK_GROUP; i++)
@@ -187,7 +203,7 @@ minstrel_ht_stats_csv_dump(struct minstrel_ht_sta *mi, int i, char *p)
struct minstrel_rate_stats *mrs = &mi->groups[i].rates[j];
static const int bitrates[4] = { 10, 20, 55, 110 };
int idx = i * MCS_GROUP_RATES + j;
- unsigned int prob_ewmsd;
+ unsigned int duration;
if (!(mi->supported[i] & BIT(j)))
continue;
@@ -222,20 +238,21 @@ minstrel_ht_stats_csv_dump(struct minstrel_ht_sta *mi, int i, char *p)
}
p += sprintf(p, "%u,", idx);
- tx_time = DIV_ROUND_CLOSEST(mg->duration[j], 1000);
+
+ duration = mg->duration[j];
+ duration <<= mg->shift;
+ tx_time = DIV_ROUND_CLOSEST(duration, 1000);
p += sprintf(p, "%u,", tx_time);
tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100));
tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_ewma);
eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
- prob_ewmsd = minstrel_get_ewmsd10(mrs);
- p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u.%u,%u,%u,"
+ p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u,%u,"
"%u,%llu,%llu,",
tp_max / 10, tp_max % 10,
tp_avg / 10, tp_avg % 10,
eprob / 10, eprob % 10,
- prob_ewmsd / 10, prob_ewmsd % 10,
mrs->retry_count,
mrs->last_success,
mrs->last_attempts,
@@ -303,17 +320,8 @@ minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir)
{
struct minstrel_ht_sta_priv *msp = priv_sta;
- msp->dbg_stats = debugfs_create_file("rc_stats", 0444, dir, msp,
- &minstrel_ht_stat_fops);
- msp->dbg_stats_csv = debugfs_create_file("rc_stats_csv", 0444, dir, msp,
- &minstrel_ht_stat_csv_fops);
-}
-
-void
-minstrel_ht_remove_sta_debugfs(void *priv, void *priv_sta)
-{
- struct minstrel_ht_sta_priv *msp = priv_sta;
-
- debugfs_remove(msp->dbg_stats);
- debugfs_remove(msp->dbg_stats_csv);
+ debugfs_create_file("rc_stats", 0444, dir, msp,
+ &minstrel_ht_stat_fops);
+ debugfs_create_file("rc_stats_csv", 0444, dir, msp,
+ &minstrel_ht_stat_csv_fops);
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 96611d5dfadb..3bd3b5769797 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -115,7 +115,8 @@ static inline bool should_drop_frame(struct sk_buff *skb, int present_fcs_len,
if (status->flag & (RX_FLAG_FAILED_FCS_CRC |
RX_FLAG_FAILED_PLCP_CRC |
- RX_FLAG_ONLY_MONITOR))
+ RX_FLAG_ONLY_MONITOR |
+ RX_FLAG_NO_PSDU))
return true;
if (unlikely(skb->len < 16 + present_fcs_len + rtap_space))
@@ -189,6 +190,15 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
BUILD_BUG_ON(sizeof(struct ieee80211_radiotap_he_mu) != 12);
}
+ if (status->flag & RX_FLAG_NO_PSDU)
+ len += 1;
+
+ if (status->flag & RX_FLAG_RADIOTAP_LSIG) {
+ len = ALIGN(len, 2);
+ len += 4;
+ BUILD_BUG_ON(sizeof(struct ieee80211_radiotap_lsig) != 4);
+ }
+
if (status->chains) {
/* antenna and antenna signal fields */
len += 2 * hweight8(status->chains);
@@ -279,6 +289,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
struct ieee80211_vendor_radiotap rtap = {};
struct ieee80211_radiotap_he he = {};
struct ieee80211_radiotap_he_mu he_mu = {};
+ struct ieee80211_radiotap_lsig lsig = {};
if (status->flag & RX_FLAG_RADIOTAP_HE) {
he = *(struct ieee80211_radiotap_he *)skb->data;
@@ -291,6 +302,11 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
skb_pull(skb, sizeof(he_mu));
}
+ if (status->flag & RX_FLAG_RADIOTAP_LSIG) {
+ lsig = *(struct ieee80211_radiotap_lsig *)skb->data;
+ skb_pull(skb, sizeof(lsig));
+ }
+
if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) {
rtap = *(struct ieee80211_vendor_radiotap *)skb->data;
/* rtap.len and rtap.pad are undone immediately */
@@ -549,7 +565,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
if (status->encoding == RX_ENC_HE &&
status->flag & RX_FLAG_RADIOTAP_HE) {
-#define HE_PREP(f, val) cpu_to_le16(FIELD_PREP(IEEE80211_RADIOTAP_HE_##f, val))
+#define HE_PREP(f, val) le16_encode_bits(val, IEEE80211_RADIOTAP_HE_##f)
if (status->enc_flags & RX_ENC_FLAG_STBC_MASK) {
he.data6 |= HE_PREP(DATA6_NSTS,
@@ -630,6 +646,21 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
pos += sizeof(he_mu);
}
+ if (status->flag & RX_FLAG_NO_PSDU) {
+ rthdr->it_present |=
+ cpu_to_le32(1 << IEEE80211_RADIOTAP_ZERO_LEN_PSDU);
+ *pos++ = status->zero_length_psdu_type;
+ }
+
+ if (status->flag & RX_FLAG_RADIOTAP_LSIG) {
+ /* ensure 2 byte alignment */
+ while ((pos - (u8 *)rthdr) & 1)
+ pos++;
+ rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_LSIG);
+ memcpy(pos, &lsig, sizeof(lsig));
+ pos += sizeof(lsig);
+ }
+
for_each_set_bit(chain, &chains, IEEE80211_MAX_CHAINS) {
*pos++ = status->chain_signal[chain];
*pos++ = chain;
@@ -1505,7 +1536,7 @@ static void sta_ps_start(struct sta_info *sta)
if (!sta->sta.txq[0])
return;
- for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
+ for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) {
if (txq_has_queue(sta->sta.txq[tid]))
set_bit(tid, &sta->txq_buffered_tids);
else
@@ -2046,6 +2077,7 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
idx = sdata->fragment_next;
for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) {
struct ieee80211_hdr *f_hdr;
+ struct sk_buff *f_skb;
idx--;
if (idx < 0)
@@ -2057,7 +2089,8 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
entry->last_frag + 1 != frag)
continue;
- f_hdr = (struct ieee80211_hdr *)entry->skb_list.next->data;
+ f_skb = __skb_peek(&entry->skb_list);
+ f_hdr = (struct ieee80211_hdr *) f_skb->data;
/*
* Check ftype and addresses are equal, else check next fragment
@@ -2314,7 +2347,7 @@ __ieee80211_data_to_8023(struct ieee80211_rx_data *rx, bool *port_control)
if (!sdata->u.mgd.use_4addr)
return -1;
- else
+ else if (!ether_addr_equal(hdr->addr1, sdata->vif.addr))
check_port_control = true;
}
@@ -2425,8 +2458,9 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
if (!xmit_skb)
net_info_ratelimited("%s: failed to clone multicast frame\n",
dev->name);
- } else if (!is_multicast_ether_addr(ehdr->h_dest)) {
- dsta = sta_info_get(sdata, skb->data);
+ } else if (!is_multicast_ether_addr(ehdr->h_dest) &&
+ !ether_addr_equal(ehdr->h_dest, ehdr->h_source)) {
+ dsta = sta_info_get(sdata, ehdr->h_dest);
if (dsta) {
/*
* The destination station is associated to
@@ -4207,11 +4241,10 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
if (fast_rx->internal_forward) {
struct sk_buff *xmit_skb = NULL;
- bool multicast = is_multicast_ether_addr(skb->data);
-
- if (multicast) {
+ if (is_multicast_ether_addr(addrs.da)) {
xmit_skb = skb_copy(skb, GFP_ATOMIC);
- } else if (sta_info_get(rx->sdata, skb->data)) {
+ } else if (!ether_addr_equal(addrs.da, addrs.sa) &&
+ sta_info_get(rx->sdata, addrs.da)) {
xmit_skb = skb;
skb = NULL;
}
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 029334835747..4e4902bdbef8 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -144,6 +144,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
wide_bw_chansw_ie->new_center_freq_seg1,
/* .basic_mcs_set doesn't matter */
};
+ struct ieee80211_ht_operation ht_oper = {};
/* default, for the case of IEEE80211_VHT_CHANWIDTH_USE_HT,
* to the previously parsed chandef
@@ -151,7 +152,9 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
new_vht_chandef = csa_ie->chandef;
/* ignore if parsing fails */
- if (!ieee80211_chandef_vht_oper(&vht_oper, &new_vht_chandef))
+ if (!ieee80211_chandef_vht_oper(&sdata->local->hw,
+ &vht_oper, &ht_oper,
+ &new_vht_chandef))
new_vht_chandef.chan = NULL;
if (sta_flags & IEEE80211_STA_DISABLE_80P80MHZ &&
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f34202242d24..fb8c2252ac0e 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -113,7 +113,12 @@ static void __cleanup_single_sta(struct sta_info *sta)
if (sta->sta.txq[0]) {
for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
- struct txq_info *txqi = to_txq_info(sta->sta.txq[i]);
+ struct txq_info *txqi;
+
+ if (!sta->sta.txq[i])
+ continue;
+
+ txqi = to_txq_info(sta->sta.txq[i]);
spin_lock_bh(&fq->lock);
ieee80211_txq_purge(local, txqi);
@@ -374,6 +379,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
struct txq_info *txq = txq_data + i * size;
+ /* might not do anything for the bufferable MMPDU TXQ */
ieee80211_txq_init(sdata, sta, txq, i);
}
}
@@ -1239,13 +1245,11 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
if (!ieee80211_hw_check(&local->hw, AP_LINK_PS))
drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta);
- if (sta->sta.txq[0]) {
- for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
- if (!txq_has_queue(sta->sta.txq[i]))
- continue;
+ for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
+ if (!sta->sta.txq[i] || !txq_has_queue(sta->sta.txq[i]))
+ continue;
- drv_wake_tx_queue(local, to_txq_info(sta->sta.txq[i]));
- }
+ drv_wake_tx_queue(local, to_txq_info(sta->sta.txq[i]));
}
skb_queue_head_init(&pending);
@@ -1683,7 +1687,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
return;
for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
- if (!(driver_release_tids & BIT(tid)) ||
+ if (!sta->sta.txq[tid] ||
+ !(driver_release_tids & BIT(tid)) ||
txq_has_queue(sta->sta.txq[tid]))
continue;
@@ -2323,13 +2328,13 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL);
}
- if (ieee80211_hw_check(&sta->local->hw, REPORTS_TX_ACK_STATUS) &&
- !(sinfo->filled & BIT_ULL(NL80211_STA_INFO_DATA_ACK_SIGNAL_AVG))) {
+ if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG)) &&
+ sta->status_stats.ack_signal_filled) {
sinfo->avg_ack_signal =
-(s8)ewma_avg_signal_read(
&sta->status_stats.avg_ack_signal);
sinfo->filled |=
- BIT_ULL(NL80211_STA_INFO_DATA_ACK_SIGNAL_AVG);
+ BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG);
}
}
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 91d7c0cd1882..aa4afbf0abaf 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -987,6 +987,25 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
}
EXPORT_SYMBOL(ieee80211_tx_status_ext);
+void ieee80211_tx_rate_update(struct ieee80211_hw *hw,
+ struct ieee80211_sta *pubsta,
+ struct ieee80211_tx_info *info)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_supported_band *sband = hw->wiphy->bands[info->band];
+ struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
+ struct ieee80211_tx_status status = {
+ .info = info,
+ .sta = pubsta,
+ };
+
+ rate_control_tx_status(local, sband, &status);
+
+ if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL))
+ sta->tx_stats.last_rate = info->status.rates[0];
+}
+EXPORT_SYMBOL(ieee80211_tx_rate_update);
+
void ieee80211_report_low_ack(struct ieee80211_sta *pubsta, u32 num_packets)
{
struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 0ab69a1964f8..588c51a67c89 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -2600,6 +2600,29 @@ TRACE_EVENT(drv_wake_tx_queue,
)
);
+TRACE_EVENT(drv_get_ftm_responder_stats,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_ftm_responder_stats *ftm_stats),
+
+ TP_ARGS(local, sdata, ftm_stats),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ VIF_ENTRY
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT VIF_PR_FMT,
+ LOCAL_PR_ARG, VIF_PR_ARG
+ )
+);
+
#endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 25ba24bef8f5..e0ccee23fbcd 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1253,10 +1253,18 @@ static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
(info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE))
return NULL;
- if (!ieee80211_is_data_present(hdr->frame_control))
- return NULL;
-
- if (sta) {
+ if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) {
+ if ((!ieee80211_is_mgmt(hdr->frame_control) ||
+ ieee80211_is_bufferable_mmpdu(hdr->frame_control) ||
+ vif->type == NL80211_IFTYPE_STATION) &&
+ sta && sta->uploaded) {
+ /*
+ * This will be NULL if the driver didn't set the
+ * opt-in hardware flag.
+ */
+ txq = sta->sta.txq[IEEE80211_NUM_TIDS];
+ }
+ } else if (sta) {
u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
if (!sta->uploaded)
@@ -1444,16 +1452,33 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
txqi->txq.vif = &sdata->vif;
- if (sta) {
- txqi->txq.sta = &sta->sta;
- sta->sta.txq[tid] = &txqi->txq;
- txqi->txq.tid = tid;
- txqi->txq.ac = ieee80211_ac_from_tid(tid);
- } else {
+ if (!sta) {
sdata->vif.txq = &txqi->txq;
txqi->txq.tid = 0;
txqi->txq.ac = IEEE80211_AC_BE;
+
+ return;
+ }
+
+ if (tid == IEEE80211_NUM_TIDS) {
+ if (sdata->vif.type == NL80211_IFTYPE_STATION) {
+ /* Drivers need to opt in to the management MPDU TXQ */
+ if (!ieee80211_hw_check(&sdata->local->hw,
+ STA_MMPDU_TXQ))
+ return;
+ } else if (!ieee80211_hw_check(&sdata->local->hw,
+ BUFF_MMPDU_TXQ)) {
+ /* Drivers need to opt in to the bufferable MMPDU TXQ */
+ return;
+ }
+ txqi->txq.ac = IEEE80211_AC_VO;
+ } else {
+ txqi->txq.ac = ieee80211_ac_from_tid(tid);
}
+
+ txqi->txq.sta = &sta->sta;
+ txqi->txq.tid = tid;
+ sta->sta.txq[tid] = &txqi->txq;
}
void ieee80211_txq_purge(struct ieee80211_local *local,
@@ -2955,6 +2980,10 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
if (!(build.key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
goto out;
+ /* Key is being removed */
+ if (build.key->flags & KEY_FLAG_TAINTED)
+ goto out;
+
switch (build.key->conf.cipher) {
case WLAN_CIPHER_SUITE_CCMP:
case WLAN_CIPHER_SUITE_CCMP_256:
@@ -3200,6 +3229,10 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
max_amsdu_len = min_t(int, max_amsdu_len,
sta->sta.max_rc_amsdu_len);
+ if (sta->sta.max_tid_amsdu_len[tid])
+ max_amsdu_len = min_t(int, max_amsdu_len,
+ sta->sta.max_tid_amsdu_len[tid]);
+
spin_lock_bh(&fq->lock);
/* TODO: Ideally aggregation should be done on dequeue to remain
@@ -3232,6 +3265,9 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
if (max_frags && nfrags > max_frags)
goto out;
+ if (!drv_can_aggregate_in_amsdu(local, head, skb))
+ goto out;
+
if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head))
goto out;
@@ -3476,13 +3512,19 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
struct ieee80211_tx_info *info;
struct ieee80211_tx_data tx;
ieee80211_tx_result r;
- struct ieee80211_vif *vif;
+ struct ieee80211_vif *vif = txq->vif;
spin_lock_bh(&fq->lock);
- if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags))
+ if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags) ||
+ test_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags))
goto out;
+ if (vif->txqs_stopped[ieee80211_ac_from_tid(txq->tid)]) {
+ set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags);
+ goto out;
+ }
+
/* Make sure fragments stay together. */
skb = __skb_dequeue(&txqi->frags);
if (skb)
@@ -3577,6 +3619,7 @@ begin:
}
IEEE80211_SKB_CB(skb)->control.vif = vif;
+
out:
spin_unlock_bh(&fq->lock);
@@ -3605,13 +3648,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
if (!IS_ERR_OR_NULL(sta)) {
struct ieee80211_fast_tx *fast_tx;
- /* We need a bit of data queued to build aggregates properly, so
- * instruct the TCP stack to allow more than a single ms of data
- * to be queued in the stack. The value is a bit-shift of 1
- * second, so 8 is ~4ms of queued data. Only affects local TCP
- * sockets.
- */
- sk_pacing_shift_update(skb->sk, 8);
+ sk_pacing_shift_update(skb->sk, sdata->local->hw.tx_sk_pacing_shift);
fast_tx = rcu_dereference(sta->fast_tx);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 716cd6442d86..bec424316ea4 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -240,6 +240,102 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
}
EXPORT_SYMBOL(ieee80211_ctstoself_duration);
+static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_vif *vif = &sdata->vif;
+ struct fq *fq = &local->fq;
+ struct ps_data *ps = NULL;
+ struct txq_info *txqi;
+ struct sta_info *sta;
+ int i;
+
+ spin_lock_bh(&fq->lock);
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP)
+ ps = &sdata->bss->ps;
+
+ sdata->vif.txqs_stopped[ac] = false;
+
+ list_for_each_entry_rcu(sta, &local->sta_list, list) {
+ if (sdata != sta->sdata)
+ continue;
+
+ for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
+ struct ieee80211_txq *txq = sta->sta.txq[i];
+
+ if (!txq)
+ continue;
+
+ txqi = to_txq_info(txq);
+
+ if (ac != txq->ac)
+ continue;
+
+ if (!test_and_clear_bit(IEEE80211_TXQ_STOP_NETIF_TX,
+ &txqi->flags))
+ continue;
+
+ spin_unlock_bh(&fq->lock);
+ drv_wake_tx_queue(local, txqi);
+ spin_lock_bh(&fq->lock);
+ }
+ }
+
+ if (!vif->txq)
+ goto out;
+
+ txqi = to_txq_info(vif->txq);
+
+ if (!test_and_clear_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags) ||
+ (ps && atomic_read(&ps->num_sta_ps)) || ac != vif->txq->ac)
+ goto out;
+
+ spin_unlock_bh(&fq->lock);
+
+ drv_wake_tx_queue(local, txqi);
+ return;
+out:
+ spin_unlock_bh(&fq->lock);
+}
+
+void ieee80211_wake_txqs(unsigned long data)
+{
+ struct ieee80211_local *local = (struct ieee80211_local *)data;
+ struct ieee80211_sub_if_data *sdata;
+ int n_acs = IEEE80211_NUM_ACS;
+ unsigned long flags;
+ int i;
+
+ rcu_read_lock();
+ spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
+
+ if (local->hw.queues < IEEE80211_NUM_ACS)
+ n_acs = 1;
+
+ for (i = 0; i < local->hw.queues; i++) {
+ if (local->queue_stop_reasons[i])
+ continue;
+
+ spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
+ list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ int ac;
+
+ for (ac = 0; ac < n_acs; ac++) {
+ int ac_queue = sdata->vif.hw_queue[ac];
+
+ if (ac_queue == i ||
+ sdata->vif.cab_queue == i)
+ __ieee80211_wake_txqs(sdata, ac);
+ }
+ }
+ spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
+ }
+
+ spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
+ rcu_read_unlock();
+}
+
void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue)
{
struct ieee80211_sub_if_data *sdata;
@@ -308,6 +404,9 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
rcu_read_unlock();
} else
tasklet_schedule(&local->tx_pending_tasklet);
+
+ if (local->ops->wake_tx_queue)
+ tasklet_schedule(&local->wake_txqs_tasklet);
}
void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
@@ -351,9 +450,6 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
if (__test_and_set_bit(reason, &local->queue_stop_reasons[queue]))
return;
- if (local->ops->wake_tx_queue)
- return;
-
if (local->hw.queues < IEEE80211_NUM_ACS)
n_acs = 1;
@@ -366,8 +462,15 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
for (ac = 0; ac < n_acs; ac++) {
if (sdata->vif.hw_queue[ac] == queue ||
- sdata->vif.cab_queue == queue)
- netif_stop_subqueue(sdata->dev, ac);
+ sdata->vif.cab_queue == queue) {
+ if (!local->ops->wake_tx_queue) {
+ netif_stop_subqueue(sdata->dev, ac);
+ continue;
+ }
+ spin_lock(&local->fq.lock);
+ sdata->vif.txqs_stopped[ac] = true;
+ spin_unlock(&local->fq.lock);
+ }
}
}
rcu_read_unlock();
@@ -2075,6 +2178,11 @@ int ieee80211_reconfig(struct ieee80211_local *local)
case NL80211_IFTYPE_AP:
changed |= BSS_CHANGED_SSID | BSS_CHANGED_P2P_PS;
+ if (sdata->vif.bss_conf.ftm_responder == 1 &&
+ wiphy_ext_feature_isset(sdata->local->hw.wiphy,
+ NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER))
+ changed |= BSS_CHANGED_FTM_RESPONDER;
+
if (sdata->vif.type == NL80211_IFTYPE_AP) {
changed |= BSS_CHANGED_AP_PROBE_RESP;
@@ -2657,49 +2765,65 @@ bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper,
return true;
}
-bool ieee80211_chandef_vht_oper(const struct ieee80211_vht_operation *oper,
+bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw,
+ const struct ieee80211_vht_operation *oper,
+ const struct ieee80211_ht_operation *htop,
struct cfg80211_chan_def *chandef)
{
struct cfg80211_chan_def new = *chandef;
- int cf1, cf2;
+ int cf0, cf1;
+ int ccfs0, ccfs1, ccfs2;
+ int ccf0, ccf1;
- if (!oper)
+ if (!oper || !htop)
return false;
- cf1 = ieee80211_channel_to_frequency(oper->center_freq_seg0_idx,
- chandef->chan->band);
- cf2 = ieee80211_channel_to_frequency(oper->center_freq_seg1_idx,
- chandef->chan->band);
+ ccfs0 = oper->center_freq_seg0_idx;
+ ccfs1 = oper->center_freq_seg1_idx;
+ ccfs2 = (le16_to_cpu(htop->operation_mode) &
+ IEEE80211_HT_OP_MODE_CCFS2_MASK)
+ >> IEEE80211_HT_OP_MODE_CCFS2_SHIFT;
+
+ /* when parsing (and we know how to) CCFS1 and CCFS2 are equivalent */
+ ccf0 = ccfs0;
+ ccf1 = ccfs1;
+ if (!ccfs1 && ieee80211_hw_check(hw, SUPPORTS_VHT_EXT_NSS_BW))
+ ccf1 = ccfs2;
+
+ cf0 = ieee80211_channel_to_frequency(ccf0, chandef->chan->band);
+ cf1 = ieee80211_channel_to_frequency(ccf1, chandef->chan->band);
switch (oper->chan_width) {
case IEEE80211_VHT_CHANWIDTH_USE_HT:
+ /* just use HT information directly */
break;
case IEEE80211_VHT_CHANWIDTH_80MHZ:
new.width = NL80211_CHAN_WIDTH_80;
- new.center_freq1 = cf1;
+ new.center_freq1 = cf0;
/* If needed, adjust based on the newer interop workaround. */
- if (oper->center_freq_seg1_idx) {
+ if (ccf1) {
unsigned int diff;
- diff = abs(oper->center_freq_seg1_idx -
- oper->center_freq_seg0_idx);
+ diff = abs(ccf1 - ccf0);
if (diff == 8) {
new.width = NL80211_CHAN_WIDTH_160;
- new.center_freq1 = cf2;
+ new.center_freq1 = cf1;
} else if (diff > 8) {
new.width = NL80211_CHAN_WIDTH_80P80;
- new.center_freq2 = cf2;
+ new.center_freq2 = cf1;
}
}
break;
case IEEE80211_VHT_CHANWIDTH_160MHZ:
+ /* deprecated encoding */
new.width = NL80211_CHAN_WIDTH_160;
- new.center_freq1 = cf1;
+ new.center_freq1 = cf0;
break;
case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
+ /* deprecated encoding */
new.width = NL80211_CHAN_WIDTH_80P80;
- new.center_freq1 = cf1;
- new.center_freq2 = cf2;
+ new.center_freq1 = cf0;
+ new.center_freq2 = cf1;
break;
default:
return false;
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 259325cbcc31..006d82e4a397 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -3,6 +3,7 @@
*
* Portions of this file
* Copyright(c) 2015 - 2016 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -231,6 +232,13 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
memcpy(&vht_cap->vht_mcs, &vht_cap_ie->supp_mcs,
sizeof(struct ieee80211_vht_mcs_info));
+ /* copy EXT_NSS_BW Support value or remove the capability */
+ if (ieee80211_hw_check(&sdata->local->hw, SUPPORTS_VHT_EXT_NSS_BW))
+ vht_cap->cap |= (cap_info & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK);
+ else
+ vht_cap->vht_mcs.tx_highest &=
+ ~cpu_to_le16(IEEE80211_VHT_EXT_NSS_BW_CAPABLE);
+
/* but also restrict MCSes */
for (i = 0; i < 8; i++) {
u16 own_rx, own_tx, peer_rx, peer_tx;
@@ -294,6 +302,18 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
break;
default:
sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80;
+
+ if (!(vht_cap->vht_mcs.tx_highest &
+ cpu_to_le16(IEEE80211_VHT_EXT_NSS_BW_CAPABLE)))
+ break;
+
+ /*
+ * If this is non-zero, then it does support 160 MHz after all,
+ * in one form or the other. We don't distinguish here (or even
+ * above) between 160 and 80+80 yet.
+ */
+ if (cap_info & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK)
+ sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
}
sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 8fbe6cdbe255..5fe274c47c41 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1223,7 +1223,7 @@ static int mpls_netconf_get_devconf(struct sk_buff *in_skb,
int err;
err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
- devconf_mpls_policy, NULL);
+ devconf_mpls_policy, extack);
if (err < 0)
goto errout;
@@ -1263,6 +1263,7 @@ errout:
static int mpls_netconf_dump_devconf(struct sk_buff *skb,
struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
struct hlist_head *head;
struct net_device *dev;
@@ -1270,6 +1271,21 @@ static int mpls_netconf_dump_devconf(struct sk_buff *skb,
int idx, s_idx;
int h, s_h;
+ if (cb->strict_check) {
+ struct netlink_ext_ack *extack = cb->extack;
+ struct netconfmsg *ncm;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf dump request");
+ return -EINVAL;
+ }
+
+ if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid data after header in netconf dump request");
+ return -EINVAL;
+ }
+ }
+
s_h = cb->args[0];
s_idx = idx = cb->args[1];
@@ -1286,7 +1302,7 @@ static int mpls_netconf_dump_devconf(struct sk_buff *skb,
goto cont;
if (mpls_netconf_fill_devconf(skb, mdev,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
+ nlh->nlmsg_seq,
RTM_NEWNETCONF,
NLM_F_MULTI,
NETCONFA_ALL) < 0) {
@@ -2015,8 +2031,43 @@ nla_put_failure:
return -EMSGSIZE;
}
+#if IS_ENABLED(CONFIG_INET)
+static int mpls_valid_fib_dump_req(const struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ return ip_valid_fib_dump_req(nlh, extack);
+}
+#else
+static int mpls_valid_fib_dump_req(const struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct rtmsg *rtm;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid header for FIB dump request");
+ return -EINVAL;
+ }
+
+ rtm = nlmsg_data(nlh);
+ if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos ||
+ rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
+ rtm->rtm_type || rtm->rtm_flags) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for FIB dump request");
+ return -EINVAL;
+ }
+
+ if (nlmsg_attrlen(nlh, sizeof(*rtm))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid data after header in FIB dump request");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+#endif
+
static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
{
+ const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
struct mpls_route __rcu **platform_label;
size_t platform_labels;
@@ -2024,6 +2075,13 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
ASSERT_RTNL();
+ if (cb->strict_check) {
+ int err = mpls_valid_fib_dump_req(nlh, cb->extack);
+
+ if (err < 0)
+ return err;
+ }
+
index = cb->args[0];
if (index < MPLS_LABEL_FIRST_UNRESERVED)
index = MPLS_LABEL_FIRST_UNRESERVED;
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 8055e3965cef..3d0a33b874f5 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -68,6 +68,10 @@ enum {
NCSI_MODE_MAX
};
+/* OEM Vendor Manufacture ID */
+#define NCSI_OEM_MFR_MLX_ID 0x8119
+#define NCSI_OEM_MFR_BCM_ID 0x113d
+
struct ncsi_channel_version {
u32 version; /* Supported BCD encoded NCSI version */
u32 alpha2; /* Supported BCD encoded NCSI version */
@@ -305,6 +309,7 @@ struct ncsi_cmd_arg {
unsigned short words[8];
unsigned int dwords[4];
};
+ unsigned char *data; /* NCSI OEM data */
};
extern struct list_head ncsi_dev_list;
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index 7567ca63aae2..82b7d9201db8 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c
@@ -211,6 +211,25 @@ static int ncsi_cmd_handler_snfc(struct sk_buff *skb,
return 0;
}
+static int ncsi_cmd_handler_oem(struct sk_buff *skb,
+ struct ncsi_cmd_arg *nca)
+{
+ struct ncsi_cmd_oem_pkt *cmd;
+ unsigned int len;
+
+ len = sizeof(struct ncsi_cmd_pkt_hdr) + 4;
+ if (nca->payload < 26)
+ len += 26;
+ else
+ len += nca->payload;
+
+ cmd = skb_put_zero(skb, len);
+ memcpy(&cmd->mfr_id, nca->data, nca->payload);
+ ncsi_cmd_build_header(&cmd->cmd.common, nca);
+
+ return 0;
+}
+
static struct ncsi_cmd_handler {
unsigned char type;
int payload;
@@ -244,7 +263,7 @@ static struct ncsi_cmd_handler {
{ NCSI_PKT_CMD_GNS, 0, ncsi_cmd_handler_default },
{ NCSI_PKT_CMD_GNPTS, 0, ncsi_cmd_handler_default },
{ NCSI_PKT_CMD_GPS, 0, ncsi_cmd_handler_default },
- { NCSI_PKT_CMD_OEM, 0, NULL },
+ { NCSI_PKT_CMD_OEM, -1, ncsi_cmd_handler_oem },
{ NCSI_PKT_CMD_PLDM, 0, NULL },
{ NCSI_PKT_CMD_GPUUID, 0, ncsi_cmd_handler_default }
};
@@ -316,8 +335,13 @@ int ncsi_xmit_cmd(struct ncsi_cmd_arg *nca)
return -ENOENT;
}
- /* Get packet payload length and allocate the request */
- nca->payload = nch->payload;
+ /* Get packet payload length and allocate the request
+ * It is expected that if length set as negative in
+ * handler structure means caller is initializing it
+ * and setting length in nca before calling xmit function
+ */
+ if (nch->payload >= 0)
+ nca->payload = nch->payload;
nr = ncsi_alloc_command(nca);
if (!nr)
return -ENOMEM;
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index 45f33d6dedf7..32cb7751d216 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -12,7 +12,6 @@
#include <linux/if_arp.h>
#include <linux/rtnetlink.h>
#include <linux/etherdevice.h>
-#include <linux/module.h>
#include <net/genetlink.h>
#include <net/ncsi.h>
#include <linux/skbuff.h>
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index 91b4b66438df..0f2087c8d42a 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -151,6 +151,20 @@ struct ncsi_cmd_snfc_pkt {
unsigned char pad[22];
};
+/* OEM Request Command as per NCSI Specification */
+struct ncsi_cmd_oem_pkt {
+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */
+ __be32 mfr_id; /* Manufacture ID */
+ unsigned char data[]; /* OEM Payload Data */
+};
+
+/* OEM Response Packet as per NCSI Specification */
+struct ncsi_rsp_oem_pkt {
+ struct ncsi_rsp_pkt_hdr rsp; /* Command header */
+ __be32 mfr_id; /* Manufacture ID */
+ unsigned char data[]; /* Payload data */
+};
+
/* Get Link Status */
struct ncsi_rsp_gls_pkt {
struct ncsi_rsp_pkt_hdr rsp; /* Response header */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 930c1d3796f0..d66b34749027 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -596,6 +596,47 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr)
return 0;
}
+static struct ncsi_rsp_oem_handler {
+ unsigned int mfr_id;
+ int (*handler)(struct ncsi_request *nr);
+} ncsi_rsp_oem_handlers[] = {
+ { NCSI_OEM_MFR_MLX_ID, NULL },
+ { NCSI_OEM_MFR_BCM_ID, NULL }
+};
+
+/* Response handler for OEM command */
+static int ncsi_rsp_handler_oem(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_oem_pkt *rsp;
+ struct ncsi_rsp_oem_handler *nrh = NULL;
+ unsigned int mfr_id, i;
+
+ /* Get the response header */
+ rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
+ mfr_id = ntohl(rsp->mfr_id);
+
+ /* Check for manufacturer id and Find the handler */
+ for (i = 0; i < ARRAY_SIZE(ncsi_rsp_oem_handlers); i++) {
+ if (ncsi_rsp_oem_handlers[i].mfr_id == mfr_id) {
+ if (ncsi_rsp_oem_handlers[i].handler)
+ nrh = &ncsi_rsp_oem_handlers[i];
+ else
+ nrh = NULL;
+
+ break;
+ }
+ }
+
+ if (!nrh) {
+ netdev_err(nr->ndp->ndev.dev, "Received unrecognized OEM packet with MFR-ID (0x%x)\n",
+ mfr_id);
+ return -ENOENT;
+ }
+
+ /* Process the packet */
+ return nrh->handler(nr);
+}
+
static int ncsi_rsp_handler_gvi(struct ncsi_request *nr)
{
struct ncsi_rsp_gvi_pkt *rsp;
@@ -932,7 +973,7 @@ static struct ncsi_rsp_handler {
{ NCSI_PKT_RSP_GNS, 172, ncsi_rsp_handler_gns },
{ NCSI_PKT_RSP_GNPTS, 172, ncsi_rsp_handler_gnpts },
{ NCSI_PKT_RSP_GPS, 8, ncsi_rsp_handler_gps },
- { NCSI_PKT_RSP_OEM, 0, NULL },
+ { NCSI_PKT_RSP_OEM, -1, ncsi_rsp_handler_oem },
{ NCSI_PKT_RSP_PLDM, 0, NULL },
{ NCSI_PKT_RSP_GPUUID, 20, ncsi_rsp_handler_gpuuid }
};
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index f61c306de1d0..2ab870ef233a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -625,6 +625,13 @@ config NFT_FIB_INET
The lookup will be delegated to the IPv4 or IPv6 FIB depending
on the protocol of the packet.
+config NFT_XFRM
+ tristate "Netfilter nf_tables xfrm/IPSec security association matching"
+ depends on XFRM
+ help
+ This option adds an expression that you can use to extract properties
+ of a packets security association.
+
config NFT_SOCKET
tristate "Netfilter nf_tables socket match support"
depends on IPV6 || IPV6=n
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 16895e045b66..4ddf3ef51ece 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -113,6 +113,7 @@ obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o
obj-$(CONFIG_NFT_SOCKET) += nft_socket.o
obj-$(CONFIG_NFT_OSF) += nft_osf.o
obj-$(CONFIG_NFT_TPROXY) += nft_tproxy.o
+obj-$(CONFIG_NFT_XFRM) += nft_xfrm.o
# nf_tables netdev
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 7ca926a03b81..fe9abf3cc10a 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1686,8 +1686,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
skb_reset_network_header(skb);
IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n",
&ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu);
- ipv4_update_pmtu(skb, ipvs->net,
- mtu, 0, 0, 0, 0);
+ ipv4_update_pmtu(skb, ipvs->net, mtu, 0, 0);
/* Client uses PMTUD? */
if (!(frag_off & htons(IP_DF)))
goto ignore_ipip;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 62eefea48973..83395bf6dc35 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3234,7 +3234,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
/* Try to find the service for which to dump destinations */
if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX,
- ip_vs_cmd_policy, NULL))
+ ip_vs_cmd_policy, cb->extack))
goto out_err;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index a676d5f76bdc..ca1168d67fac 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -379,7 +379,7 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
return false;
}
- l4proto = __nf_ct_l4proto_find(l3num, protonum);
+ l4proto = __nf_ct_l4proto_find(protonum);
ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
l4proto);
@@ -539,7 +539,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
nf_ct_tmpl_free(ct);
return;
}
- l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
if (l4proto->destroy)
l4proto->destroy(ct);
@@ -840,7 +840,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
enum ip_conntrack_info oldinfo;
struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
- l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
if (l4proto->allow_clash &&
!nf_ct_is_dying(ct) &&
atomic_inc_not_zero(&ct->ct_general.use)) {
@@ -1109,7 +1109,7 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
if (!test_bit(IPS_ASSURED_BIT, &ct->status))
return true;
- l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
if (l4proto->can_early_drop && l4proto->can_early_drop(ct))
return true;
@@ -1370,12 +1370,6 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
- if (!l4proto->new(ct, skb, dataoff)) {
- nf_conntrack_free(ct);
- pr_debug("can't track with proto module\n");
- return NULL;
- }
-
if (timeout_ext)
nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout),
GFP_ATOMIC);
@@ -1436,12 +1430,12 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
/* On success, returns 0, sets skb->_nfct | ctinfo */
static int
-resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
+resolve_normal_ct(struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
- u_int16_t l3num,
u_int8_t protonum,
- const struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto,
+ const struct nf_hook_state *state)
{
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple tuple;
@@ -1452,17 +1446,18 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
u32 hash;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
- dataoff, l3num, protonum, net, &tuple, l4proto)) {
+ dataoff, state->pf, protonum, state->net,
+ &tuple, l4proto)) {
pr_debug("Can't get tuple\n");
return 0;
}
/* look for tuple match */
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
- hash = hash_conntrack_raw(&tuple, net);
- h = __nf_conntrack_find_get(net, zone, &tuple, hash);
+ hash = hash_conntrack_raw(&tuple, state->net);
+ h = __nf_conntrack_find_get(state->net, zone, &tuple, hash);
if (!h) {
- h = init_conntrack(net, tmpl, &tuple, l4proto,
+ h = init_conntrack(state->net, tmpl, &tuple, l4proto,
skb, dataoff, hash);
if (!h)
return 0;
@@ -1491,13 +1486,45 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
return 0;
}
+/*
+ * icmp packets need special treatment to handle error messages that are
+ * related to a connection.
+ *
+ * Callers need to check if skb has a conntrack assigned when this
+ * helper returns; in such case skb belongs to an already known connection.
+ */
+static unsigned int __cold
+nf_conntrack_handle_icmp(struct nf_conn *tmpl,
+ struct sk_buff *skb,
+ unsigned int dataoff,
+ u8 protonum,
+ const struct nf_hook_state *state)
+{
+ int ret;
+
+ if (state->pf == NFPROTO_IPV4 && protonum == IPPROTO_ICMP)
+ ret = nf_conntrack_icmpv4_error(tmpl, skb, dataoff, state);
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (state->pf == NFPROTO_IPV6 && protonum == IPPROTO_ICMPV6)
+ ret = nf_conntrack_icmpv6_error(tmpl, skb, dataoff, state);
+#endif
+ else
+ return NF_ACCEPT;
+
+ if (ret <= 0) {
+ NF_CT_STAT_INC_ATOMIC(state->net, error);
+ NF_CT_STAT_INC_ATOMIC(state->net, invalid);
+ }
+
+ return ret;
+}
+
unsigned int
-nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
- struct sk_buff *skb)
+nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
{
const struct nf_conntrack_l4proto *l4proto;
- struct nf_conn *ct, *tmpl;
enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct, *tmpl;
u_int8_t protonum;
int dataoff, ret;
@@ -1506,32 +1533,28 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
/* Previously seen (loopback or untracked)? Ignore. */
if ((tmpl && !nf_ct_is_template(tmpl)) ||
ctinfo == IP_CT_UNTRACKED) {
- NF_CT_STAT_INC_ATOMIC(net, ignore);
+ NF_CT_STAT_INC_ATOMIC(state->net, ignore);
return NF_ACCEPT;
}
skb->_nfct = 0;
}
/* rcu_read_lock()ed by nf_hook_thresh */
- dataoff = get_l4proto(skb, skb_network_offset(skb), pf, &protonum);
+ dataoff = get_l4proto(skb, skb_network_offset(skb), state->pf, &protonum);
if (dataoff <= 0) {
pr_debug("not prepared to track yet or error occurred\n");
- NF_CT_STAT_INC_ATOMIC(net, error);
- NF_CT_STAT_INC_ATOMIC(net, invalid);
+ NF_CT_STAT_INC_ATOMIC(state->net, error);
+ NF_CT_STAT_INC_ATOMIC(state->net, invalid);
ret = NF_ACCEPT;
goto out;
}
- l4proto = __nf_ct_l4proto_find(pf, protonum);
+ l4proto = __nf_ct_l4proto_find(protonum);
- /* It may be an special packet, error, unclean...
- * inverse of the return code tells to the netfilter
- * core what to do with the packet. */
- if (l4proto->error != NULL) {
- ret = l4proto->error(net, tmpl, skb, dataoff, pf, hooknum);
+ if (protonum == IPPROTO_ICMP || protonum == IPPROTO_ICMPV6) {
+ ret = nf_conntrack_handle_icmp(tmpl, skb, dataoff,
+ protonum, state);
if (ret <= 0) {
- NF_CT_STAT_INC_ATOMIC(net, error);
- NF_CT_STAT_INC_ATOMIC(net, invalid);
ret = -ret;
goto out;
}
@@ -1540,10 +1563,11 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
goto out;
}
repeat:
- ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l4proto);
+ ret = resolve_normal_ct(tmpl, skb, dataoff,
+ protonum, l4proto, state);
if (ret < 0) {
/* Too stressed to deal. */
- NF_CT_STAT_INC_ATOMIC(net, drop);
+ NF_CT_STAT_INC_ATOMIC(state->net, drop);
ret = NF_DROP;
goto out;
}
@@ -1551,21 +1575,21 @@ repeat:
ct = nf_ct_get(skb, &ctinfo);
if (!ct) {
/* Not valid part of a connection */
- NF_CT_STAT_INC_ATOMIC(net, invalid);
+ NF_CT_STAT_INC_ATOMIC(state->net, invalid);
ret = NF_ACCEPT;
goto out;
}
- ret = l4proto->packet(ct, skb, dataoff, ctinfo);
+ ret = l4proto->packet(ct, skb, dataoff, ctinfo, state);
if (ret <= 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
pr_debug("nf_conntrack_in: Can't track with proto module\n");
nf_conntrack_put(&ct->ct_general);
skb->_nfct = 0;
- NF_CT_STAT_INC_ATOMIC(net, invalid);
+ NF_CT_STAT_INC_ATOMIC(state->net, invalid);
if (ret == -NF_DROP)
- NF_CT_STAT_INC_ATOMIC(net, drop);
+ NF_CT_STAT_INC_ATOMIC(state->net, drop);
/* Special case: TCP tracker reports an attempt to reopen a
* closed/aborted connection. We have to go back and create a
* fresh conntrack.
@@ -1594,8 +1618,7 @@ bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
rcu_read_lock();
ret = nf_ct_invert_tuple(inverse, orig,
- __nf_ct_l4proto_find(orig->src.l3num,
- orig->dst.protonum));
+ __nf_ct_l4proto_find(orig->dst.protonum));
rcu_read_unlock();
return ret;
}
@@ -1752,7 +1775,7 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
if (dataoff <= 0)
return -1;
- l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+ l4proto = nf_ct_l4proto_find_get(l4num);
if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
l4num, net, &tuple, l4proto))
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 27b84231db10..3034038bfdf0 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -610,8 +610,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
expect->tuple.src.l3num,
expect->tuple.dst.protonum);
print_tuple(s, &expect->tuple,
- __nf_ct_l4proto_find(expect->tuple.src.l3num,
- expect->tuple.dst.protonum));
+ __nf_ct_l4proto_find(expect->tuple.dst.protonum));
if (expect->flags & NF_CT_EXPECT_PERMANENT) {
seq_puts(s, "PERMANENT");
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 036207ecaf16..4ae8e528943a 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -135,8 +135,7 @@ static int ctnetlink_dump_tuples(struct sk_buff *skb,
ret = ctnetlink_dump_tuples_ip(skb, tuple);
if (ret >= 0) {
- l4proto = __nf_ct_l4proto_find(tuple->src.l3num,
- tuple->dst.protonum);
+ l4proto = __nf_ct_l4proto_find(tuple->dst.protonum);
ret = ctnetlink_dump_tuples_proto(skb, tuple, l4proto);
}
rcu_read_unlock();
@@ -184,7 +183,7 @@ static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct)
struct nlattr *nest_proto;
int ret;
- l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
if (!l4proto->to_nlattr)
return 0;
@@ -592,7 +591,7 @@ static size_t ctnetlink_proto_size(const struct nf_conn *ct)
len = nla_policy_len(cta_ip_nla_policy, CTA_IP_MAX + 1);
len *= 3u; /* ORIG, REPLY, MASTER */
- l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
len += l4proto->nlattr_size;
if (l4proto->nlattr_tuple_size) {
len4 = l4proto->nlattr_tuple_size();
@@ -821,6 +820,7 @@ static int ctnetlink_done(struct netlink_callback *cb)
}
struct ctnetlink_filter {
+ u8 family;
struct {
u_int32_t val;
u_int32_t mask;
@@ -828,31 +828,39 @@ struct ctnetlink_filter {
};
static struct ctnetlink_filter *
-ctnetlink_alloc_filter(const struct nlattr * const cda[])
+ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
{
-#ifdef CONFIG_NF_CONNTRACK_MARK
struct ctnetlink_filter *filter;
+#ifndef CONFIG_NF_CONNTRACK_MARK
+ if (cda[CTA_MARK] && cda[CTA_MARK_MASK])
+ return ERR_PTR(-EOPNOTSUPP);
+#endif
+
filter = kzalloc(sizeof(*filter), GFP_KERNEL);
if (filter == NULL)
return ERR_PTR(-ENOMEM);
- filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK]));
- filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+ filter->family = family;
- return filter;
-#else
- return ERR_PTR(-EOPNOTSUPP);
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
+ filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK]));
+ filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+ }
#endif
+ return filter;
}
static int ctnetlink_start(struct netlink_callback *cb)
{
const struct nlattr * const *cda = cb->data;
struct ctnetlink_filter *filter = NULL;
+ struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+ u8 family = nfmsg->nfgen_family;
- if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
- filter = ctnetlink_alloc_filter(cda);
+ if (family || (cda[CTA_MARK] && cda[CTA_MARK_MASK])) {
+ filter = ctnetlink_alloc_filter(cda, family);
if (IS_ERR(filter))
return PTR_ERR(filter);
}
@@ -866,13 +874,24 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
struct ctnetlink_filter *filter = data;
if (filter == NULL)
- return 1;
+ goto out;
+
+ /* Match entries of a given L3 protocol number.
+ * If it is not specified, ie. l3proto == 0,
+ * then match everything.
+ */
+ if (filter->family && nf_ct_l3num(ct) != filter->family)
+ goto ignore_entry;
#ifdef CONFIG_NF_CONNTRACK_MARK
- if ((ct->mark & filter->mark.mask) == filter->mark.val)
- return 1;
+ if ((ct->mark & filter->mark.mask) != filter->mark.val)
+ goto ignore_entry;
#endif
+out:
+ return 1;
+
+ignore_entry:
return 0;
}
@@ -883,8 +902,6 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
struct nf_conn *ct, *last;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
- struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
- u_int8_t l3proto = nfmsg->nfgen_family;
struct nf_conn *nf_ct_evict[8];
int res, i;
spinlock_t *lockp;
@@ -923,11 +940,6 @@ restart:
if (!net_eq(net, nf_ct_net(ct)))
continue;
- /* Dump entries of a given L3 protocol number.
- * If it is not specified, ie. l3proto == 0,
- * then dump everything. */
- if (l3proto && nf_ct_l3num(ct) != l3proto)
- continue;
if (cb->args[1]) {
if (ct != last)
continue;
@@ -1048,7 +1060,7 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]);
rcu_read_lock();
- l4proto = __nf_ct_l4proto_find(tuple->src.l3num, tuple->dst.protonum);
+ l4proto = __nf_ct_l4proto_find(tuple->dst.protonum);
if (likely(l4proto->nlattr_to_tuple)) {
ret = nla_validate_nested(attr, CTA_PROTO_MAX,
@@ -1213,12 +1225,12 @@ static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
static int ctnetlink_flush_conntrack(struct net *net,
const struct nlattr * const cda[],
- u32 portid, int report)
+ u32 portid, int report, u8 family)
{
struct ctnetlink_filter *filter = NULL;
- if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
- filter = ctnetlink_alloc_filter(cda);
+ if (family || (cda[CTA_MARK] && cda[CTA_MARK_MASK])) {
+ filter = ctnetlink_alloc_filter(cda, family);
if (IS_ERR(filter))
return PTR_ERR(filter);
}
@@ -1257,7 +1269,7 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
else {
return ctnetlink_flush_conntrack(net, cda,
NETLINK_CB(skb).portid,
- nlmsg_report(nlh));
+ nlmsg_report(nlh), u3);
}
if (err < 0)
@@ -1696,7 +1708,7 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct,
return err;
rcu_read_lock();
- l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
if (l4proto->from_nlattr)
err = l4proto->from_nlattr(tb, ct);
rcu_read_unlock();
@@ -2656,8 +2668,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
rcu_read_lock();
ret = ctnetlink_dump_tuples_ip(skb, &m);
if (ret >= 0) {
- l4proto = __nf_ct_l4proto_find(tuple->src.l3num,
- tuple->dst.protonum);
+ l4proto = __nf_ct_l4proto_find(tuple->dst.protonum);
ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto);
}
rcu_read_unlock();
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 51c5d7eec0a3..40643af7137e 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -43,7 +43,7 @@
extern unsigned int nf_conntrack_net_id;
-static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly;
+static struct nf_conntrack_l4proto __rcu *nf_ct_protos[MAX_NF_CT_PROTO + 1] __read_mostly;
static DEFINE_MUTEX(nf_ct_proto_mutex);
@@ -124,23 +124,21 @@ void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid);
#endif
-const struct nf_conntrack_l4proto *
-__nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
+const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto)
{
- if (unlikely(l3proto >= NFPROTO_NUMPROTO || nf_ct_protos[l3proto] == NULL))
+ if (unlikely(l4proto >= ARRAY_SIZE(nf_ct_protos)))
return &nf_conntrack_l4proto_generic;
- return rcu_dereference(nf_ct_protos[l3proto][l4proto]);
+ return rcu_dereference(nf_ct_protos[l4proto]);
}
EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
-const struct nf_conntrack_l4proto *
-nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
+const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4num)
{
const struct nf_conntrack_l4proto *p;
rcu_read_lock();
- p = __nf_ct_l4proto_find(l3num, l4num);
+ p = __nf_ct_l4proto_find(l4num);
if (!try_module_get(p->me))
p = &nf_conntrack_l4proto_generic;
rcu_read_unlock();
@@ -159,8 +157,7 @@ static int kill_l4proto(struct nf_conn *i, void *data)
{
const struct nf_conntrack_l4proto *l4proto;
l4proto = data;
- return nf_ct_protonum(i) == l4proto->l4proto &&
- nf_ct_l3num(i) == l4proto->l3proto;
+ return nf_ct_protonum(i) == l4proto->l4proto;
}
static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
@@ -219,48 +216,20 @@ int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
- if (l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos))
- return -EBUSY;
-
if ((l4proto->to_nlattr && l4proto->nlattr_size == 0) ||
(l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
return -EINVAL;
mutex_lock(&nf_ct_proto_mutex);
- if (!nf_ct_protos[l4proto->l3proto]) {
- /* l3proto may be loaded latter. */
- struct nf_conntrack_l4proto __rcu **proto_array;
- int i;
-
- proto_array =
- kmalloc_array(MAX_NF_CT_PROTO,
- sizeof(struct nf_conntrack_l4proto *),
- GFP_KERNEL);
- if (proto_array == NULL) {
- ret = -ENOMEM;
- goto out_unlock;
- }
-
- for (i = 0; i < MAX_NF_CT_PROTO; i++)
- RCU_INIT_POINTER(proto_array[i],
- &nf_conntrack_l4proto_generic);
-
- /* Before making proto_array visible to lockless readers,
- * we must make sure its content is committed to memory.
- */
- smp_wmb();
-
- nf_ct_protos[l4proto->l3proto] = proto_array;
- } else if (rcu_dereference_protected(
- nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+ if (rcu_dereference_protected(
+ nf_ct_protos[l4proto->l4proto],
lockdep_is_held(&nf_ct_proto_mutex)
) != &nf_conntrack_l4proto_generic) {
ret = -EBUSY;
goto out_unlock;
}
- rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
- l4proto);
+ rcu_assign_pointer(nf_ct_protos[l4proto->l4proto], l4proto);
out_unlock:
mutex_unlock(&nf_ct_proto_mutex);
return ret;
@@ -274,7 +243,7 @@ int nf_ct_l4proto_pernet_register_one(struct net *net,
struct nf_proto_net *pn = NULL;
if (l4proto->init_net) {
- ret = l4proto->init_net(net, l4proto->l3proto);
+ ret = l4proto->init_net(net);
if (ret < 0)
goto out;
}
@@ -296,13 +265,13 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
{
- BUG_ON(l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos));
+ BUG_ON(l4proto->l4proto >= ARRAY_SIZE(nf_ct_protos));
BUG_ON(rcu_dereference_protected(
- nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+ nf_ct_protos[l4proto->l4proto],
lockdep_is_held(&nf_ct_proto_mutex)
) != l4proto);
- rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+ rcu_assign_pointer(nf_ct_protos[l4proto->l4proto],
&nf_conntrack_l4proto_generic);
}
@@ -352,7 +321,7 @@ static int
nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
unsigned int num_proto)
{
- int ret = -EINVAL, ver;
+ int ret = -EINVAL;
unsigned int i;
for (i = 0; i < num_proto; i++) {
@@ -361,9 +330,8 @@ nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
break;
}
if (i != num_proto) {
- ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4;
- pr_err("nf_conntrack_ipv%d: can't register l4 %d proto.\n",
- ver, l4proto[i]->l4proto);
+ pr_err("nf_conntrack: can't register l4 %d proto.\n",
+ l4proto[i]->l4proto);
nf_ct_l4proto_unregister(l4proto, i);
}
return ret;
@@ -382,9 +350,8 @@ int nf_ct_l4proto_pernet_register(struct net *net,
break;
}
if (i != num_proto) {
- pr_err("nf_conntrack_proto_%d %d: pernet registration failed\n",
- l4proto[i]->l4proto,
- l4proto[i]->l3proto == PF_INET6 ? 6 : 4);
+ pr_err("nf_conntrack %d: pernet registration failed\n",
+ l4proto[i]->l4proto);
nf_ct_l4proto_pernet_unregister(net, l4proto, i);
}
return ret;
@@ -455,7 +422,7 @@ static unsigned int ipv4_conntrack_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
+ return nf_conntrack_in(skb, state);
}
static unsigned int ipv4_conntrack_local(void *priv,
@@ -477,7 +444,7 @@ static unsigned int ipv4_conntrack_local(void *priv,
return NF_ACCEPT;
}
- return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
+ return nf_conntrack_in(skb, state);
}
/* Connection tracking may drop packets, but never alters them, so
@@ -690,14 +657,14 @@ static unsigned int ipv6_conntrack_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
+ return nf_conntrack_in(skb, state);
}
static unsigned int ipv6_conntrack_local(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
+ return nf_conntrack_in(skb, state);
}
static unsigned int ipv6_helper(void *priv,
@@ -911,37 +878,26 @@ void nf_ct_netns_put(struct net *net, uint8_t nfproto)
EXPORT_SYMBOL_GPL(nf_ct_netns_put);
static const struct nf_conntrack_l4proto * const builtin_l4proto[] = {
- &nf_conntrack_l4proto_tcp4,
- &nf_conntrack_l4proto_udp4,
+ &nf_conntrack_l4proto_tcp,
+ &nf_conntrack_l4proto_udp,
&nf_conntrack_l4proto_icmp,
#ifdef CONFIG_NF_CT_PROTO_DCCP
- &nf_conntrack_l4proto_dccp4,
+ &nf_conntrack_l4proto_dccp,
#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
- &nf_conntrack_l4proto_sctp4,
+ &nf_conntrack_l4proto_sctp,
#endif
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
- &nf_conntrack_l4proto_udplite4,
+ &nf_conntrack_l4proto_udplite,
#endif
#if IS_ENABLED(CONFIG_IPV6)
- &nf_conntrack_l4proto_tcp6,
- &nf_conntrack_l4proto_udp6,
&nf_conntrack_l4proto_icmpv6,
-#ifdef CONFIG_NF_CT_PROTO_DCCP
- &nf_conntrack_l4proto_dccp6,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_SCTP
- &nf_conntrack_l4proto_sctp6,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE
- &nf_conntrack_l4proto_udplite6,
-#endif
#endif /* CONFIG_IPV6 */
};
int nf_conntrack_proto_init(void)
{
- int ret = 0;
+ int ret = 0, i;
ret = nf_register_sockopt(&so_getorigdst);
if (ret < 0)
@@ -952,6 +908,11 @@ int nf_conntrack_proto_init(void)
if (ret < 0)
goto cleanup_sockopt;
#endif
+
+ for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
+ RCU_INIT_POINTER(nf_ct_protos[i],
+ &nf_conntrack_l4proto_generic);
+
ret = nf_ct_l4proto_register(builtin_l4proto,
ARRAY_SIZE(builtin_l4proto));
if (ret < 0)
@@ -969,17 +930,10 @@ cleanup_sockopt:
void nf_conntrack_proto_fini(void)
{
- unsigned int i;
-
nf_unregister_sockopt(&so_getorigdst);
#if IS_ENABLED(CONFIG_IPV6)
nf_unregister_sockopt(&so_getorigdst6);
#endif
- /* No need to call nf_ct_l4proto_unregister(), the register
- * tables are free'd here anyway.
- */
- for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
- kfree(nf_ct_protos[i]);
}
int nf_conntrack_proto_pernet_init(struct net *net)
@@ -988,8 +942,7 @@ int nf_conntrack_proto_pernet_init(struct net *net)
struct nf_proto_net *pn = nf_ct_l4proto_net(net,
&nf_conntrack_l4proto_generic);
- err = nf_conntrack_l4proto_generic.init_net(net,
- nf_conntrack_l4proto_generic.l3proto);
+ err = nf_conntrack_l4proto_generic.init_net(net);
if (err < 0)
return err;
err = nf_ct_l4proto_register_sysctl(net,
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index f3f91ed2c21a..171e9e122e5f 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -389,18 +389,15 @@ static inline struct nf_dccp_net *dccp_pernet(struct net *net)
return &net->ct.nf_ct_proto.dccp;
}
-static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
- unsigned int dataoff)
+static noinline bool
+dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
+ const struct dccp_hdr *dh)
{
struct net *net = nf_ct_net(ct);
struct nf_dccp_net *dn;
- struct dccp_hdr _dh, *dh;
const char *msg;
u_int8_t state;
- dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
- BUG_ON(dh == NULL);
-
state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE];
switch (state) {
default:
@@ -438,8 +435,51 @@ static u64 dccp_ack_seq(const struct dccp_hdr *dh)
ntohl(dhack->dccph_ack_nr_low);
}
-static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
- unsigned int dataoff, enum ip_conntrack_info ctinfo)
+static bool dccp_error(const struct dccp_hdr *dh,
+ struct sk_buff *skb, unsigned int dataoff,
+ const struct nf_hook_state *state)
+{
+ unsigned int dccp_len = skb->len - dataoff;
+ unsigned int cscov;
+ const char *msg;
+
+ if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) ||
+ dh->dccph_doff * 4 > dccp_len) {
+ msg = "nf_ct_dccp: truncated/malformed packet ";
+ goto out_invalid;
+ }
+
+ cscov = dccp_len;
+ if (dh->dccph_cscov) {
+ cscov = (dh->dccph_cscov - 1) * 4;
+ if (cscov > dccp_len) {
+ msg = "nf_ct_dccp: bad checksum coverage ";
+ goto out_invalid;
+ }
+ }
+
+ if (state->hook == NF_INET_PRE_ROUTING &&
+ state->net->ct.sysctl_checksum &&
+ nf_checksum_partial(skb, state->hook, dataoff, cscov,
+ IPPROTO_DCCP, state->pf)) {
+ msg = "nf_ct_dccp: bad checksum ";
+ goto out_invalid;
+ }
+
+ if (dh->dccph_type >= DCCP_PKT_INVALID) {
+ msg = "nf_ct_dccp: reserved packet type ";
+ goto out_invalid;
+ }
+ return false;
+out_invalid:
+ nf_l4proto_log_invalid(skb, state->net, state->pf,
+ IPPROTO_DCCP, "%s", msg);
+ return true;
+}
+
+static int dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
+ unsigned int dataoff, enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state)
{
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
struct dccp_hdr _dh, *dh;
@@ -448,8 +488,15 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int *timeouts;
dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
- BUG_ON(dh == NULL);
+ if (!dh)
+ return NF_DROP;
+
+ if (dccp_error(dh, skb, dataoff, state))
+ return -NF_ACCEPT;
+
type = dh->dccph_type;
+ if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh))
+ return -NF_ACCEPT;
if (type == DCCP_PKT_RESET &&
!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
@@ -527,55 +574,6 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
return NF_ACCEPT;
}
-static int dccp_error(struct net *net, struct nf_conn *tmpl,
- struct sk_buff *skb, unsigned int dataoff,
- u_int8_t pf, unsigned int hooknum)
-{
- struct dccp_hdr _dh, *dh;
- unsigned int dccp_len = skb->len - dataoff;
- unsigned int cscov;
- const char *msg;
-
- dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
- if (dh == NULL) {
- msg = "nf_ct_dccp: short packet ";
- goto out_invalid;
- }
-
- if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) ||
- dh->dccph_doff * 4 > dccp_len) {
- msg = "nf_ct_dccp: truncated/malformed packet ";
- goto out_invalid;
- }
-
- cscov = dccp_len;
- if (dh->dccph_cscov) {
- cscov = (dh->dccph_cscov - 1) * 4;
- if (cscov > dccp_len) {
- msg = "nf_ct_dccp: bad checksum coverage ";
- goto out_invalid;
- }
- }
-
- if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
- nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_DCCP,
- pf)) {
- msg = "nf_ct_dccp: bad checksum ";
- goto out_invalid;
- }
-
- if (dh->dccph_type >= DCCP_PKT_INVALID) {
- msg = "nf_ct_dccp: reserved packet type ";
- goto out_invalid;
- }
-
- return NF_ACCEPT;
-
-out_invalid:
- nf_l4proto_log_invalid(skb, net, pf, IPPROTO_DCCP, "%s", msg);
- return -NF_ACCEPT;
-}
-
static bool dccp_can_early_drop(const struct nf_conn *ct)
{
switch (ct->proto.dccp.state) {
@@ -814,7 +812,7 @@ static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
return 0;
}
-static int dccp_init_net(struct net *net, u_int16_t proto)
+static int dccp_init_net(struct net *net)
{
struct nf_dccp_net *dn = dccp_pernet(net);
struct nf_proto_net *pn = &dn->pn;
@@ -844,45 +842,9 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.dccp.pn;
}
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = {
- .l3proto = AF_INET,
- .l4proto = IPPROTO_DCCP,
- .new = dccp_new,
- .packet = dccp_packet,
- .error = dccp_error,
- .can_early_drop = dccp_can_early_drop,
-#ifdef CONFIG_NF_CONNTRACK_PROCFS
- .print_conntrack = dccp_print_conntrack,
-#endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
- .nlattr_size = DCCP_NLATTR_SIZE,
- .to_nlattr = dccp_to_nlattr,
- .from_nlattr = nlattr_to_dccp,
- .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
- .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
- .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
- .nla_policy = nf_ct_port_nla_policy,
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
- .ctnl_timeout = {
- .nlattr_to_obj = dccp_timeout_nlattr_to_obj,
- .obj_to_nlattr = dccp_timeout_obj_to_nlattr,
- .nlattr_max = CTA_TIMEOUT_DCCP_MAX,
- .obj_size = sizeof(unsigned int) * CT_DCCP_MAX,
- .nla_policy = dccp_timeout_nla_policy,
- },
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
- .init_net = dccp_init_net,
- .get_net_proto = dccp_get_net_proto,
-};
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = {
- .l3proto = AF_INET6,
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp = {
.l4proto = IPPROTO_DCCP,
- .new = dccp_new,
.packet = dccp_packet,
- .error = dccp_error,
.can_early_drop = dccp_can_early_drop,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = dccp_print_conntrack,
@@ -908,4 +870,3 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = {
.init_net = dccp_init_net,
.get_net_proto = dccp_get_net_proto,
};
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp6);
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 1df3244ecd07..e10e867e0b55 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -44,12 +44,19 @@ static bool generic_pkt_to_tuple(const struct sk_buff *skb,
/* Returns verdict for packet, or -1 for invalid. */
static int generic_packet(struct nf_conn *ct,
- const struct sk_buff *skb,
+ struct sk_buff *skb,
unsigned int dataoff,
- enum ip_conntrack_info ctinfo)
+ enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state)
{
const unsigned int *timeout = nf_ct_timeout_lookup(ct);
+ if (!nf_generic_should_process(nf_ct_protonum(ct))) {
+ pr_warn_once("conntrack: generic helper won't handle protocol %d. Please consider loading the specific helper module.\n",
+ nf_ct_protonum(ct));
+ return -NF_ACCEPT;
+ }
+
if (!timeout)
timeout = &generic_pernet(nf_ct_net(ct))->timeout;
@@ -57,19 +64,6 @@ static int generic_packet(struct nf_conn *ct,
return NF_ACCEPT;
}
-/* Called when a new connection for this protocol found. */
-static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
- unsigned int dataoff)
-{
- bool ret;
-
- ret = nf_generic_should_process(nf_ct_protonum(ct));
- if (!ret)
- pr_warn_once("conntrack: generic helper won't handle protocol %d. Please consider loading the specific helper module.\n",
- nf_ct_protonum(ct));
- return ret;
-}
-
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
#include <linux/netfilter/nfnetlink.h>
@@ -142,7 +136,7 @@ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
-static int generic_init_net(struct net *net, u_int16_t proto)
+static int generic_init_net(struct net *net)
{
struct nf_generic_net *gn = generic_pernet(net);
struct nf_proto_net *pn = &gn->pn;
@@ -159,11 +153,9 @@ static struct nf_proto_net *generic_get_net_proto(struct net *net)
const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
{
- .l3proto = PF_UNSPEC,
.l4proto = 255,
.pkt_to_tuple = generic_pkt_to_tuple,
.packet = generic_packet,
- .new = generic_new,
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
.ctnl_timeout = {
.nlattr_to_obj = generic_timeout_nlattr_to_obj,
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 650eb4fba2c5..9b48dc8b4b88 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -233,10 +233,26 @@ static unsigned int *gre_get_timeouts(struct net *net)
/* Returns verdict for packet, and may modify conntrack */
static int gre_packet(struct nf_conn *ct,
- const struct sk_buff *skb,
+ struct sk_buff *skb,
unsigned int dataoff,
- enum ip_conntrack_info ctinfo)
+ enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state)
{
+ if (state->pf != NFPROTO_IPV4)
+ return -NF_ACCEPT;
+
+ if (!nf_ct_is_confirmed(ct)) {
+ unsigned int *timeouts = nf_ct_timeout_lookup(ct);
+
+ if (!timeouts)
+ timeouts = gre_get_timeouts(nf_ct_net(ct));
+
+ /* initialize to sane value. Ideally a conntrack helper
+ * (e.g. in case of pptp) is increasing them */
+ ct->proto.gre.stream_timeout = timeouts[GRE_CT_REPLIED];
+ ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED];
+ }
+
/* If we've seen traffic both ways, this is a GRE connection.
* Extend timeout. */
if (ct->status & IPS_SEEN_REPLY) {
@@ -252,26 +268,6 @@ static int gre_packet(struct nf_conn *ct,
return NF_ACCEPT;
}
-/* Called when a new connection for this protocol found. */
-static bool gre_new(struct nf_conn *ct, const struct sk_buff *skb,
- unsigned int dataoff)
-{
- unsigned int *timeouts = nf_ct_timeout_lookup(ct);
-
- if (!timeouts)
- timeouts = gre_get_timeouts(nf_ct_net(ct));
-
- pr_debug(": ");
- nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-
- /* initialize to sane value. Ideally a conntrack helper
- * (e.g. in case of pptp) is increasing them */
- ct->proto.gre.stream_timeout = timeouts[GRE_CT_REPLIED];
- ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED];
-
- return true;
-}
-
/* Called when a conntrack entry has already been removed from the hashes
* and is about to be deleted from memory */
static void gre_destroy(struct nf_conn *ct)
@@ -336,7 +332,7 @@ gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = {
};
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-static int gre_init_net(struct net *net, u_int16_t proto)
+static int gre_init_net(struct net *net)
{
struct netns_proto_gre *net_gre = gre_pernet(net);
int i;
@@ -351,14 +347,12 @@ static int gre_init_net(struct net *net, u_int16_t proto)
/* protocol helper struct */
static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
- .l3proto = AF_INET,
.l4proto = IPPROTO_GRE,
.pkt_to_tuple = gre_pkt_to_tuple,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = gre_print_conntrack,
#endif
.packet = gre_packet,
- .new = gre_new,
.destroy = gre_destroy,
.me = THIS_MODULE,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index 43c7e1a217b9..3598520bd19b 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -72,34 +72,17 @@ static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
return true;
}
-static unsigned int *icmp_get_timeouts(struct net *net)
-{
- return &icmp_pernet(net)->timeout;
-}
-
/* Returns verdict for packet, or -1 for invalid. */
static int icmp_packet(struct nf_conn *ct,
- const struct sk_buff *skb,
+ struct sk_buff *skb,
unsigned int dataoff,
- enum ip_conntrack_info ctinfo)
+ enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state)
{
/* Do not immediately delete the connection after the first
successful reply to avoid excessive conntrackd traffic
and also to handle correctly ICMP echo reply duplicates. */
unsigned int *timeout = nf_ct_timeout_lookup(ct);
-
- if (!timeout)
- timeout = icmp_get_timeouts(nf_ct_net(ct));
-
- nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
-
- return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
- unsigned int dataoff)
-{
static const u_int8_t valid_new[] = {
[ICMP_ECHO] = 1,
[ICMP_TIMESTAMP] = 1,
@@ -107,21 +90,29 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
[ICMP_ADDRESS] = 1
};
+ if (state->pf != NFPROTO_IPV4)
+ return -NF_ACCEPT;
+
if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) ||
!valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) {
/* Can't create a new ICMP `conn' with this. */
pr_debug("icmp: can't create new conn with type %u\n",
ct->tuplehash[0].tuple.dst.u.icmp.type);
nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
- return false;
+ return -NF_ACCEPT;
}
- return true;
+
+ if (!timeout)
+ timeout = &icmp_pernet(nf_ct_net(ct))->timeout;
+
+ nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
+ return NF_ACCEPT;
}
/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
static int
-icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
- unsigned int hooknum)
+icmp_error_message(struct nf_conn *tmpl, struct sk_buff *skb,
+ const struct nf_hook_state *state)
{
struct nf_conntrack_tuple innertuple, origtuple;
const struct nf_conntrack_l4proto *innerproto;
@@ -137,13 +128,13 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
if (!nf_ct_get_tuplepr(skb,
skb_network_offset(skb) + ip_hdrlen(skb)
+ sizeof(struct icmphdr),
- PF_INET, net, &origtuple)) {
+ PF_INET, state->net, &origtuple)) {
pr_debug("icmp_error_message: failed to get tuple\n");
return -NF_ACCEPT;
}
/* rcu_read_lock()ed by nf_hook_thresh */
- innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum);
+ innerproto = __nf_ct_l4proto_find(origtuple.dst.protonum);
/* Ordinarily, we'd expect the inverted tupleproto, but it's
been preserved inside the ICMP. */
@@ -154,7 +145,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
ctinfo = IP_CT_RELATED;
- h = nf_conntrack_find_get(net, zone, &innertuple);
+ h = nf_conntrack_find_get(state->net, zone, &innertuple);
if (!h) {
pr_debug("icmp_error_message: no match\n");
return -NF_ACCEPT;
@@ -168,17 +159,18 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
return NF_ACCEPT;
}
-static void icmp_error_log(const struct sk_buff *skb, struct net *net,
- u8 pf, const char *msg)
+static void icmp_error_log(const struct sk_buff *skb,
+ const struct nf_hook_state *state,
+ const char *msg)
{
- nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg);
+ nf_l4proto_log_invalid(skb, state->net, state->pf,
+ IPPROTO_ICMP, "%s", msg);
}
/* Small and modified version of icmp_rcv */
-static int
-icmp_error(struct net *net, struct nf_conn *tmpl,
- struct sk_buff *skb, unsigned int dataoff,
- u8 pf, unsigned int hooknum)
+int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
+ struct sk_buff *skb, unsigned int dataoff,
+ const struct nf_hook_state *state)
{
const struct icmphdr *icmph;
struct icmphdr _ih;
@@ -186,14 +178,15 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
/* Not enough header? */
icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
if (icmph == NULL) {
- icmp_error_log(skb, net, pf, "short packet");
+ icmp_error_log(skb, state, "short packet");
return -NF_ACCEPT;
}
/* See ip_conntrack_proto_tcp.c */
- if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
- nf_ip_checksum(skb, hooknum, dataoff, 0)) {
- icmp_error_log(skb, net, pf, "bad hw icmp checksum");
+ if (state->net->ct.sysctl_checksum &&
+ state->hook == NF_INET_PRE_ROUTING &&
+ nf_ip_checksum(skb, state->hook, dataoff, 0)) {
+ icmp_error_log(skb, state, "bad hw icmp checksum");
return -NF_ACCEPT;
}
@@ -204,7 +197,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
* discarded.
*/
if (icmph->type > NR_ICMP_TYPES) {
- icmp_error_log(skb, net, pf, "invalid icmp type");
+ icmp_error_log(skb, state, "invalid icmp type");
return -NF_ACCEPT;
}
@@ -216,7 +209,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
icmph->type != ICMP_REDIRECT)
return NF_ACCEPT;
- return icmp_error_message(net, tmpl, skb, hooknum);
+ return icmp_error_message(tmpl, skb, state);
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -342,7 +335,7 @@ static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
-static int icmp_init_net(struct net *net, u_int16_t proto)
+static int icmp_init_net(struct net *net)
{
struct nf_icmp_net *in = icmp_pernet(net);
struct nf_proto_net *pn = &in->pn;
@@ -359,13 +352,10 @@ static struct nf_proto_net *icmp_get_net_proto(struct net *net)
const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
{
- .l3proto = PF_INET,
.l4proto = IPPROTO_ICMP,
.pkt_to_tuple = icmp_pkt_to_tuple,
.invert_tuple = icmp_invert_tuple,
.packet = icmp_packet,
- .new = icmp_new,
- .error = icmp_error,
.destroy = NULL,
.me = NULL,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index 97e40f77d678..378618feed5d 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -92,11 +92,31 @@ static unsigned int *icmpv6_get_timeouts(struct net *net)
/* Returns verdict for packet, or -1 for invalid. */
static int icmpv6_packet(struct nf_conn *ct,
- const struct sk_buff *skb,
- unsigned int dataoff,
- enum ip_conntrack_info ctinfo)
+ struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state)
{
unsigned int *timeout = nf_ct_timeout_lookup(ct);
+ static const u8 valid_new[] = {
+ [ICMPV6_ECHO_REQUEST - 128] = 1,
+ [ICMPV6_NI_QUERY - 128] = 1
+ };
+
+ if (state->pf != NFPROTO_IPV6)
+ return -NF_ACCEPT;
+
+ if (!nf_ct_is_confirmed(ct)) {
+ int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128;
+
+ if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
+ /* Can't create a new ICMPv6 `conn' with this. */
+ pr_debug("icmpv6: can't create new conn with type %u\n",
+ type + 128);
+ nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
+ return -NF_ACCEPT;
+ }
+ }
if (!timeout)
timeout = icmpv6_get_timeouts(nf_ct_net(ct));
@@ -109,26 +129,6 @@ static int icmpv6_packet(struct nf_conn *ct,
return NF_ACCEPT;
}
-/* Called when a new connection for this protocol found. */
-static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
- unsigned int dataoff)
-{
- static const u_int8_t valid_new[] = {
- [ICMPV6_ECHO_REQUEST - 128] = 1,
- [ICMPV6_NI_QUERY - 128] = 1
- };
- int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128;
-
- if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
- /* Can't create a new ICMPv6 `conn' with this. */
- pr_debug("icmpv6: can't create new conn with type %u\n",
- type + 128);
- nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
- return false;
- }
- return true;
-}
-
static int
icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
@@ -153,7 +153,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
}
/* rcu_read_lock()ed by nf_hook_thresh */
- inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum);
+ inproto = __nf_ct_l4proto_find(origtuple.dst.protonum);
/* Ordinarily, we'd expect the inverted tupleproto, but it's
been preserved inside the ICMP. */
@@ -179,16 +179,18 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
return NF_ACCEPT;
}
-static void icmpv6_error_log(const struct sk_buff *skb, struct net *net,
- u8 pf, const char *msg)
+static void icmpv6_error_log(const struct sk_buff *skb,
+ const struct nf_hook_state *state,
+ const char *msg)
{
- nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg);
+ nf_l4proto_log_invalid(skb, state->net, state->pf,
+ IPPROTO_ICMPV6, "%s", msg);
}
-static int
-icmpv6_error(struct net *net, struct nf_conn *tmpl,
- struct sk_buff *skb, unsigned int dataoff,
- u8 pf, unsigned int hooknum)
+int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
+ struct sk_buff *skb,
+ unsigned int dataoff,
+ const struct nf_hook_state *state)
{
const struct icmp6hdr *icmp6h;
struct icmp6hdr _ih;
@@ -196,13 +198,14 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
if (icmp6h == NULL) {
- icmpv6_error_log(skb, net, pf, "short packet");
+ icmpv6_error_log(skb, state, "short packet");
return -NF_ACCEPT;
}
- if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
- nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
- icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed");
+ if (state->hook == NF_INET_PRE_ROUTING &&
+ state->net->ct.sysctl_checksum &&
+ nf_ip6_checksum(skb, state->hook, dataoff, IPPROTO_ICMPV6)) {
+ icmpv6_error_log(skb, state, "ICMPv6 checksum failed");
return -NF_ACCEPT;
}
@@ -217,7 +220,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
if (icmp6h->icmp6_type >= 128)
return NF_ACCEPT;
- return icmpv6_error_message(net, tmpl, skb, dataoff);
+ return icmpv6_error_message(state->net, tmpl, skb, dataoff);
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -343,7 +346,7 @@ static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
-static int icmpv6_init_net(struct net *net, u_int16_t proto)
+static int icmpv6_init_net(struct net *net)
{
struct nf_icmp_net *in = icmpv6_pernet(net);
struct nf_proto_net *pn = &in->pn;
@@ -360,13 +363,10 @@ static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
{
- .l3proto = PF_INET6,
.l4proto = IPPROTO_ICMPV6,
.pkt_to_tuple = icmpv6_pkt_to_tuple,
.invert_tuple = icmpv6_invert_tuple,
.packet = icmpv6_packet,
- .new = icmpv6_new,
- .error = icmpv6_error,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = icmpv6_tuple_to_nlattr,
.nlattr_tuple_size = icmpv6_nlattr_tuple_size,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index e4d738d34cd0..3d719d3eb9a3 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -273,11 +273,100 @@ static int sctp_new_state(enum ip_conntrack_dir dir,
return sctp_conntracks[dir][i][cur_state];
}
+/* Don't need lock here: this conntrack not in circulation yet */
+static noinline bool
+sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
+ const struct sctphdr *sh, unsigned int dataoff)
+{
+ enum sctp_conntrack new_state;
+ const struct sctp_chunkhdr *sch;
+ struct sctp_chunkhdr _sch;
+ u32 offset, count;
+
+ memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
+ new_state = SCTP_CONNTRACK_MAX;
+ for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) {
+ new_state = sctp_new_state(IP_CT_DIR_ORIGINAL,
+ SCTP_CONNTRACK_NONE, sch->type);
+
+ /* Invalid: delete conntrack */
+ if (new_state == SCTP_CONNTRACK_NONE ||
+ new_state == SCTP_CONNTRACK_MAX) {
+ pr_debug("nf_conntrack_sctp: invalid new deleting.\n");
+ return false;
+ }
+
+ /* Copy the vtag into the state info */
+ if (sch->type == SCTP_CID_INIT) {
+ struct sctp_inithdr _inithdr, *ih;
+ /* Sec 8.5.1 (A) */
+ if (sh->vtag)
+ return false;
+
+ ih = skb_header_pointer(skb, offset + sizeof(_sch),
+ sizeof(_inithdr), &_inithdr);
+ if (!ih)
+ return false;
+
+ pr_debug("Setting vtag %x for new conn\n",
+ ih->init_tag);
+
+ ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag;
+ } else if (sch->type == SCTP_CID_HEARTBEAT) {
+ pr_debug("Setting vtag %x for secondary conntrack\n",
+ sh->vtag);
+ ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
+ } else {
+ /* If it is a shutdown ack OOTB packet, we expect a return
+ shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
+ pr_debug("Setting vtag %x for new conn OOTB\n",
+ sh->vtag);
+ ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
+ }
+
+ ct->proto.sctp.state = new_state;
+ }
+
+ return true;
+}
+
+static bool sctp_error(struct sk_buff *skb,
+ unsigned int dataoff,
+ const struct nf_hook_state *state)
+{
+ const struct sctphdr *sh;
+ const char *logmsg;
+
+ if (skb->len < dataoff + sizeof(struct sctphdr)) {
+ logmsg = "nf_ct_sctp: short packet ";
+ goto out_invalid;
+ }
+ if (state->hook == NF_INET_PRE_ROUTING &&
+ state->net->ct.sysctl_checksum &&
+ skb->ip_summed == CHECKSUM_NONE) {
+ if (!skb_make_writable(skb, dataoff + sizeof(struct sctphdr))) {
+ logmsg = "nf_ct_sctp: failed to read header ";
+ goto out_invalid;
+ }
+ sh = (const struct sctphdr *)(skb->data + dataoff);
+ if (sh->checksum != sctp_compute_cksum(skb, dataoff)) {
+ logmsg = "nf_ct_sctp: bad CRC ";
+ goto out_invalid;
+ }
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+ return false;
+out_invalid:
+ nf_l4proto_log_invalid(skb, state->net, state->pf, IPPROTO_SCTP, "%s", logmsg);
+ return true;
+}
+
/* Returns verdict for packet, or -NF_ACCEPT for invalid. */
static int sctp_packet(struct nf_conn *ct,
- const struct sk_buff *skb,
+ struct sk_buff *skb,
unsigned int dataoff,
- enum ip_conntrack_info ctinfo)
+ enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state)
{
enum sctp_conntrack new_state, old_state;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
@@ -289,6 +378,9 @@ static int sctp_packet(struct nf_conn *ct,
unsigned int *timeouts;
unsigned long map[256 / sizeof(unsigned long)] = { 0 };
+ if (sctp_error(skb, dataoff, state))
+ return -NF_ACCEPT;
+
sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
if (sh == NULL)
goto out;
@@ -296,6 +388,17 @@ static int sctp_packet(struct nf_conn *ct,
if (do_basic_checks(ct, skb, dataoff, map) != 0)
goto out;
+ if (!nf_ct_is_confirmed(ct)) {
+ /* If an OOTB packet has any of these chunks discard (Sec 8.4) */
+ if (test_bit(SCTP_CID_ABORT, map) ||
+ test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) ||
+ test_bit(SCTP_CID_COOKIE_ACK, map))
+ return -NF_ACCEPT;
+
+ if (!sctp_new(ct, skb, sh, dataoff))
+ return -NF_ACCEPT;
+ }
+
/* Check the verification tag (Sec 8.5) */
if (!test_bit(SCTP_CID_INIT, map) &&
!test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) &&
@@ -397,110 +500,6 @@ out:
return -NF_ACCEPT;
}
-/* Called when a new connection for this protocol found. */
-static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
- unsigned int dataoff)
-{
- enum sctp_conntrack new_state;
- const struct sctphdr *sh;
- struct sctphdr _sctph;
- const struct sctp_chunkhdr *sch;
- struct sctp_chunkhdr _sch;
- u_int32_t offset, count;
- unsigned long map[256 / sizeof(unsigned long)] = { 0 };
-
- sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
- if (sh == NULL)
- return false;
-
- if (do_basic_checks(ct, skb, dataoff, map) != 0)
- return false;
-
- /* If an OOTB packet has any of these chunks discard (Sec 8.4) */
- if (test_bit(SCTP_CID_ABORT, map) ||
- test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) ||
- test_bit(SCTP_CID_COOKIE_ACK, map))
- return false;
-
- memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
- new_state = SCTP_CONNTRACK_MAX;
- for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
- /* Don't need lock here: this conntrack not in circulation yet */
- new_state = sctp_new_state(IP_CT_DIR_ORIGINAL,
- SCTP_CONNTRACK_NONE, sch->type);
-
- /* Invalid: delete conntrack */
- if (new_state == SCTP_CONNTRACK_NONE ||
- new_state == SCTP_CONNTRACK_MAX) {
- pr_debug("nf_conntrack_sctp: invalid new deleting.\n");
- return false;
- }
-
- /* Copy the vtag into the state info */
- if (sch->type == SCTP_CID_INIT) {
- struct sctp_inithdr _inithdr, *ih;
- /* Sec 8.5.1 (A) */
- if (sh->vtag)
- return false;
-
- ih = skb_header_pointer(skb, offset + sizeof(_sch),
- sizeof(_inithdr), &_inithdr);
- if (!ih)
- return false;
-
- pr_debug("Setting vtag %x for new conn\n",
- ih->init_tag);
-
- ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag;
- } else if (sch->type == SCTP_CID_HEARTBEAT) {
- pr_debug("Setting vtag %x for secondary conntrack\n",
- sh->vtag);
- ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
- }
- /* If it is a shutdown ack OOTB packet, we expect a return
- shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
- else {
- pr_debug("Setting vtag %x for new conn OOTB\n",
- sh->vtag);
- ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
- }
-
- ct->proto.sctp.state = new_state;
- }
-
- return true;
-}
-
-static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb,
- unsigned int dataoff,
- u8 pf, unsigned int hooknum)
-{
- const struct sctphdr *sh;
- const char *logmsg;
-
- if (skb->len < dataoff + sizeof(struct sctphdr)) {
- logmsg = "nf_ct_sctp: short packet ";
- goto out_invalid;
- }
- if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
- skb->ip_summed == CHECKSUM_NONE) {
- if (!skb_make_writable(skb, dataoff + sizeof(struct sctphdr))) {
- logmsg = "nf_ct_sctp: failed to read header ";
- goto out_invalid;
- }
- sh = (const struct sctphdr *)(skb->data + dataoff);
- if (sh->checksum != sctp_compute_cksum(skb, dataoff)) {
- logmsg = "nf_ct_sctp: bad CRC ";
- goto out_invalid;
- }
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- }
- return NF_ACCEPT;
-out_invalid:
- nf_l4proto_log_invalid(skb, net, pf, IPPROTO_SCTP, "%s", logmsg);
- return -NF_ACCEPT;
-}
-
static bool sctp_can_early_drop(const struct nf_conn *ct)
{
switch (ct->proto.sctp.state) {
@@ -735,7 +734,7 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
-static int sctp_init_net(struct net *net, u_int16_t proto)
+static int sctp_init_net(struct net *net)
{
struct nf_sctp_net *sn = sctp_pernet(net);
struct nf_proto_net *pn = &sn->pn;
@@ -760,49 +759,12 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.sctp.pn;
}
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = {
- .l3proto = PF_INET,
- .l4proto = IPPROTO_SCTP,
-#ifdef CONFIG_NF_CONNTRACK_PROCFS
- .print_conntrack = sctp_print_conntrack,
-#endif
- .packet = sctp_packet,
- .new = sctp_new,
- .error = sctp_error,
- .can_early_drop = sctp_can_early_drop,
- .me = THIS_MODULE,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
- .nlattr_size = SCTP_NLATTR_SIZE,
- .to_nlattr = sctp_to_nlattr,
- .from_nlattr = nlattr_to_sctp,
- .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
- .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
- .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
- .nla_policy = nf_ct_port_nla_policy,
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
- .ctnl_timeout = {
- .nlattr_to_obj = sctp_timeout_nlattr_to_obj,
- .obj_to_nlattr = sctp_timeout_obj_to_nlattr,
- .nlattr_max = CTA_TIMEOUT_SCTP_MAX,
- .obj_size = sizeof(unsigned int) * SCTP_CONNTRACK_MAX,
- .nla_policy = sctp_timeout_nla_policy,
- },
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
- .init_net = sctp_init_net,
- .get_net_proto = sctp_get_net_proto,
-};
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
- .l3proto = PF_INET6,
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp = {
.l4proto = IPPROTO_SCTP,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = sctp_print_conntrack,
#endif
.packet = sctp_packet,
- .new = sctp_new,
- .error = sctp_error,
.can_early_drop = sctp_can_early_drop,
.me = THIS_MODULE,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -826,4 +788,3 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
.init_net = sctp_init_net,
.get_net_proto = sctp_get_net_proto,
};
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp6);
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 247b89784a6f..1bcf9984d45e 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -717,35 +717,26 @@ static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
[TCPHDR_ACK|TCPHDR_URG] = 1,
};
-static void tcp_error_log(const struct sk_buff *skb, struct net *net,
- u8 pf, const char *msg)
+static void tcp_error_log(const struct sk_buff *skb,
+ const struct nf_hook_state *state,
+ const char *msg)
{
- nf_l4proto_log_invalid(skb, net, pf, IPPROTO_TCP, "%s", msg);
+ nf_l4proto_log_invalid(skb, state->net, state->pf, IPPROTO_TCP, "%s", msg);
}
/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
-static int tcp_error(struct net *net, struct nf_conn *tmpl,
- struct sk_buff *skb,
- unsigned int dataoff,
- u_int8_t pf,
- unsigned int hooknum)
+static bool tcp_error(const struct tcphdr *th,
+ struct sk_buff *skb,
+ unsigned int dataoff,
+ const struct nf_hook_state *state)
{
- const struct tcphdr *th;
- struct tcphdr _tcph;
unsigned int tcplen = skb->len - dataoff;
- u_int8_t tcpflags;
-
- /* Smaller that minimal TCP header? */
- th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
- if (th == NULL) {
- tcp_error_log(skb, net, pf, "short packet");
- return -NF_ACCEPT;
- }
+ u8 tcpflags;
/* Not whole TCP header or malformed packet */
if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
- tcp_error_log(skb, net, pf, "truncated packet");
- return -NF_ACCEPT;
+ tcp_error_log(skb, state, "truncated packet");
+ return true;
}
/* Checksum invalid? Ignore.
@@ -753,27 +744,101 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
* because the checksum is assumed to be correct.
*/
/* FIXME: Source route IP option packets --RR */
- if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
- nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
- tcp_error_log(skb, net, pf, "bad checksum");
- return -NF_ACCEPT;
+ if (state->net->ct.sysctl_checksum &&
+ state->hook == NF_INET_PRE_ROUTING &&
+ nf_checksum(skb, state->hook, dataoff, IPPROTO_TCP, state->pf)) {
+ tcp_error_log(skb, state, "bad checksum");
+ return true;
}
/* Check TCP flags. */
tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
if (!tcp_valid_flags[tcpflags]) {
- tcp_error_log(skb, net, pf, "invalid tcp flag combination");
- return -NF_ACCEPT;
+ tcp_error_log(skb, state, "invalid tcp flag combination");
+ return true;
}
- return NF_ACCEPT;
+ return false;
+}
+
+static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
+ unsigned int dataoff,
+ const struct tcphdr *th)
+{
+ enum tcp_conntrack new_state;
+ struct net *net = nf_ct_net(ct);
+ const struct nf_tcp_net *tn = tcp_pernet(net);
+ const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
+ const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
+
+ /* Don't need lock here: this conntrack not in circulation yet */
+ new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
+
+ /* Invalid: delete conntrack */
+ if (new_state >= TCP_CONNTRACK_MAX) {
+ pr_debug("nf_ct_tcp: invalid new deleting.\n");
+ return false;
+ }
+
+ if (new_state == TCP_CONNTRACK_SYN_SENT) {
+ memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
+ /* SYN packet */
+ ct->proto.tcp.seen[0].td_end =
+ segment_seq_plus_len(ntohl(th->seq), skb->len,
+ dataoff, th);
+ ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+ if (ct->proto.tcp.seen[0].td_maxwin == 0)
+ ct->proto.tcp.seen[0].td_maxwin = 1;
+ ct->proto.tcp.seen[0].td_maxend =
+ ct->proto.tcp.seen[0].td_end;
+
+ tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
+ } else if (tn->tcp_loose == 0) {
+ /* Don't try to pick up connections. */
+ return false;
+ } else {
+ memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
+ /*
+ * We are in the middle of a connection,
+ * its history is lost for us.
+ * Let's try to use the data from the packet.
+ */
+ ct->proto.tcp.seen[0].td_end =
+ segment_seq_plus_len(ntohl(th->seq), skb->len,
+ dataoff, th);
+ ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
+ if (ct->proto.tcp.seen[0].td_maxwin == 0)
+ ct->proto.tcp.seen[0].td_maxwin = 1;
+ ct->proto.tcp.seen[0].td_maxend =
+ ct->proto.tcp.seen[0].td_end +
+ ct->proto.tcp.seen[0].td_maxwin;
+
+ /* We assume SACK and liberal window checking to handle
+ * window scaling */
+ ct->proto.tcp.seen[0].flags =
+ ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
+ IP_CT_TCP_FLAG_BE_LIBERAL;
+ }
+
+ /* tcp_packet will set them */
+ ct->proto.tcp.last_index = TCP_NONE_SET;
+
+ pr_debug("%s: sender end=%u maxend=%u maxwin=%u scale=%i "
+ "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
+ __func__,
+ sender->td_end, sender->td_maxend, sender->td_maxwin,
+ sender->td_scale,
+ receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
+ receiver->td_scale);
+ return true;
}
/* Returns verdict for packet, or -1 for invalid. */
static int tcp_packet(struct nf_conn *ct,
- const struct sk_buff *skb,
+ struct sk_buff *skb,
unsigned int dataoff,
- enum ip_conntrack_info ctinfo)
+ enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state)
{
struct net *net = nf_ct_net(ct);
struct nf_tcp_net *tn = tcp_pernet(net);
@@ -786,7 +851,14 @@ static int tcp_packet(struct nf_conn *ct,
unsigned long timeout;
th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
- BUG_ON(th == NULL);
+ if (th == NULL)
+ return -NF_ACCEPT;
+
+ if (tcp_error(th, skb, dataoff, state))
+ return -NF_ACCEPT;
+
+ if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th))
+ return -NF_ACCEPT;
spin_lock_bh(&ct->lock);
old_state = ct->proto.tcp.state;
@@ -1067,82 +1139,6 @@ static int tcp_packet(struct nf_conn *ct,
return NF_ACCEPT;
}
-/* Called when a new connection for this protocol found. */
-static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
- unsigned int dataoff)
-{
- enum tcp_conntrack new_state;
- const struct tcphdr *th;
- struct tcphdr _tcph;
- struct net *net = nf_ct_net(ct);
- struct nf_tcp_net *tn = tcp_pernet(net);
- const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
- const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
-
- th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
- BUG_ON(th == NULL);
-
- /* Don't need lock here: this conntrack not in circulation yet */
- new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
-
- /* Invalid: delete conntrack */
- if (new_state >= TCP_CONNTRACK_MAX) {
- pr_debug("nf_ct_tcp: invalid new deleting.\n");
- return false;
- }
-
- if (new_state == TCP_CONNTRACK_SYN_SENT) {
- memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
- /* SYN packet */
- ct->proto.tcp.seen[0].td_end =
- segment_seq_plus_len(ntohl(th->seq), skb->len,
- dataoff, th);
- ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
- if (ct->proto.tcp.seen[0].td_maxwin == 0)
- ct->proto.tcp.seen[0].td_maxwin = 1;
- ct->proto.tcp.seen[0].td_maxend =
- ct->proto.tcp.seen[0].td_end;
-
- tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
- } else if (tn->tcp_loose == 0) {
- /* Don't try to pick up connections. */
- return false;
- } else {
- memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
- /*
- * We are in the middle of a connection,
- * its history is lost for us.
- * Let's try to use the data from the packet.
- */
- ct->proto.tcp.seen[0].td_end =
- segment_seq_plus_len(ntohl(th->seq), skb->len,
- dataoff, th);
- ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
- if (ct->proto.tcp.seen[0].td_maxwin == 0)
- ct->proto.tcp.seen[0].td_maxwin = 1;
- ct->proto.tcp.seen[0].td_maxend =
- ct->proto.tcp.seen[0].td_end +
- ct->proto.tcp.seen[0].td_maxwin;
-
- /* We assume SACK and liberal window checking to handle
- * window scaling */
- ct->proto.tcp.seen[0].flags =
- ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
- IP_CT_TCP_FLAG_BE_LIBERAL;
- }
-
- /* tcp_packet will set them */
- ct->proto.tcp.last_index = TCP_NONE_SET;
-
- pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
- return true;
-}
-
static bool tcp_can_early_drop(const struct nf_conn *ct)
{
switch (ct->proto.tcp.state) {
@@ -1510,7 +1506,7 @@ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
-static int tcp_init_net(struct net *net, u_int16_t proto)
+static int tcp_init_net(struct net *net)
{
struct nf_tcp_net *tn = tcp_pernet(net);
struct nf_proto_net *pn = &tn->pn;
@@ -1538,16 +1534,13 @@ static struct nf_proto_net *tcp_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.tcp.pn;
}
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
{
- .l3proto = PF_INET,
.l4proto = IPPROTO_TCP,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = tcp_print_conntrack,
#endif
.packet = tcp_packet,
- .new = tcp_new,
- .error = tcp_error,
.can_early_drop = tcp_can_early_drop,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = tcp_to_nlattr,
@@ -1571,39 +1564,3 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
.init_net = tcp_init_net,
.get_net_proto = tcp_get_net_proto,
};
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
-{
- .l3proto = PF_INET6,
- .l4proto = IPPROTO_TCP,
-#ifdef CONFIG_NF_CONNTRACK_PROCFS
- .print_conntrack = tcp_print_conntrack,
-#endif
- .packet = tcp_packet,
- .new = tcp_new,
- .error = tcp_error,
- .can_early_drop = tcp_can_early_drop,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
- .nlattr_size = TCP_NLATTR_SIZE,
- .to_nlattr = tcp_to_nlattr,
- .from_nlattr = nlattr_to_tcp,
- .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
- .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
- .nlattr_tuple_size = tcp_nlattr_tuple_size,
- .nla_policy = nf_ct_port_nla_policy,
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
- .ctnl_timeout = {
- .nlattr_to_obj = tcp_timeout_nlattr_to_obj,
- .obj_to_nlattr = tcp_timeout_obj_to_nlattr,
- .nlattr_max = CTA_TIMEOUT_TCP_MAX,
- .obj_size = sizeof(unsigned int) *
- TCP_CONNTRACK_TIMEOUT_MAX,
- .nla_policy = tcp_timeout_nla_policy,
- },
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
- .init_net = tcp_init_net,
- .get_net_proto = tcp_get_net_proto,
-};
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 3065fb8ef91b..a7aa70370913 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -42,14 +42,65 @@ static unsigned int *udp_get_timeouts(struct net *net)
return udp_pernet(net)->timeouts;
}
+static void udp_error_log(const struct sk_buff *skb,
+ const struct nf_hook_state *state,
+ const char *msg)
+{
+ nf_l4proto_log_invalid(skb, state->net, state->pf,
+ IPPROTO_UDP, "%s", msg);
+}
+
+static bool udp_error(struct sk_buff *skb,
+ unsigned int dataoff,
+ const struct nf_hook_state *state)
+{
+ unsigned int udplen = skb->len - dataoff;
+ const struct udphdr *hdr;
+ struct udphdr _hdr;
+
+ /* Header is too small? */
+ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ if (!hdr) {
+ udp_error_log(skb, state, "short packet");
+ return true;
+ }
+
+ /* Truncated/malformed packets */
+ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
+ udp_error_log(skb, state, "truncated/malformed packet");
+ return true;
+ }
+
+ /* Packet with no checksum */
+ if (!hdr->check)
+ return false;
+
+ /* Checksum invalid? Ignore.
+ * We skip checking packets on the outgoing path
+ * because the checksum is assumed to be correct.
+ * FIXME: Source route IP option packets --RR */
+ if (state->hook == NF_INET_PRE_ROUTING &&
+ state->net->ct.sysctl_checksum &&
+ nf_checksum(skb, state->hook, dataoff, IPPROTO_UDP, state->pf)) {
+ udp_error_log(skb, state, "bad checksum");
+ return true;
+ }
+
+ return false;
+}
+
/* Returns verdict for packet, and may modify conntracktype */
static int udp_packet(struct nf_conn *ct,
- const struct sk_buff *skb,
+ struct sk_buff *skb,
unsigned int dataoff,
- enum ip_conntrack_info ctinfo)
+ enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state)
{
unsigned int *timeouts;
+ if (udp_error(skb, dataoff, state))
+ return -NF_ACCEPT;
+
timeouts = nf_ct_timeout_lookup(ct);
if (!timeouts)
timeouts = udp_get_timeouts(nf_ct_net(ct));
@@ -69,24 +120,18 @@ static int udp_packet(struct nf_conn *ct,
return NF_ACCEPT;
}
-/* Called when a new connection for this protocol found. */
-static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
- unsigned int dataoff)
-{
- return true;
-}
-
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-static void udplite_error_log(const struct sk_buff *skb, struct net *net,
- u8 pf, const char *msg)
+static void udplite_error_log(const struct sk_buff *skb,
+ const struct nf_hook_state *state,
+ const char *msg)
{
- nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDPLITE, "%s", msg);
+ nf_l4proto_log_invalid(skb, state->net, state->pf,
+ IPPROTO_UDPLITE, "%s", msg);
}
-static int udplite_error(struct net *net, struct nf_conn *tmpl,
- struct sk_buff *skb,
- unsigned int dataoff,
- u8 pf, unsigned int hooknum)
+static bool udplite_error(struct sk_buff *skb,
+ unsigned int dataoff,
+ const struct nf_hook_state *state)
{
unsigned int udplen = skb->len - dataoff;
const struct udphdr *hdr;
@@ -96,80 +141,67 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
/* Header is too small? */
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (!hdr) {
- udplite_error_log(skb, net, pf, "short packet");
- return -NF_ACCEPT;
+ udplite_error_log(skb, state, "short packet");
+ return true;
}
cscov = ntohs(hdr->len);
if (cscov == 0) {
cscov = udplen;
} else if (cscov < sizeof(*hdr) || cscov > udplen) {
- udplite_error_log(skb, net, pf, "invalid checksum coverage");
- return -NF_ACCEPT;
+ udplite_error_log(skb, state, "invalid checksum coverage");
+ return true;
}
/* UDPLITE mandates checksums */
if (!hdr->check) {
- udplite_error_log(skb, net, pf, "checksum missing");
- return -NF_ACCEPT;
+ udplite_error_log(skb, state, "checksum missing");
+ return true;
}
/* Checksum invalid? Ignore. */
- if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
- nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
- pf)) {
- udplite_error_log(skb, net, pf, "bad checksum");
- return -NF_ACCEPT;
+ if (state->hook == NF_INET_PRE_ROUTING &&
+ state->net->ct.sysctl_checksum &&
+ nf_checksum_partial(skb, state->hook, dataoff, cscov, IPPROTO_UDP,
+ state->pf)) {
+ udplite_error_log(skb, state, "bad checksum");
+ return true;
}
- return NF_ACCEPT;
-}
-#endif
-
-static void udp_error_log(const struct sk_buff *skb, struct net *net,
- u8 pf, const char *msg)
-{
- nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDP, "%s", msg);
+ return false;
}
-static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
- unsigned int dataoff,
- u_int8_t pf,
- unsigned int hooknum)
+/* Returns verdict for packet, and may modify conntracktype */
+static int udplite_packet(struct nf_conn *ct,
+ struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state)
{
- unsigned int udplen = skb->len - dataoff;
- const struct udphdr *hdr;
- struct udphdr _hdr;
-
- /* Header is too small? */
- hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
- if (hdr == NULL) {
- udp_error_log(skb, net, pf, "short packet");
- return -NF_ACCEPT;
- }
+ unsigned int *timeouts;
- /* Truncated/malformed packets */
- if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
- udp_error_log(skb, net, pf, "truncated/malformed packet");
+ if (udplite_error(skb, dataoff, state))
return -NF_ACCEPT;
- }
- /* Packet with no checksum */
- if (!hdr->check)
- return NF_ACCEPT;
+ timeouts = nf_ct_timeout_lookup(ct);
+ if (!timeouts)
+ timeouts = udp_get_timeouts(nf_ct_net(ct));
- /* Checksum invalid? Ignore.
- * We skip checking packets on the outgoing path
- * because the checksum is assumed to be correct.
- * FIXME: Source route IP option packets --RR */
- if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
- nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
- udp_error_log(skb, net, pf, "bad checksum");
- return -NF_ACCEPT;
+ /* If we've seen traffic both ways, this is some kind of UDP
+ stream. Extend timeout. */
+ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ nf_ct_refresh_acct(ct, ctinfo, skb,
+ timeouts[UDP_CT_REPLIED]);
+ /* Also, more likely to be important, and not a probe */
+ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
+ nf_conntrack_event_cache(IPCT_ASSURED, ct);
+ } else {
+ nf_ct_refresh_acct(ct, ctinfo, skb,
+ timeouts[UDP_CT_UNREPLIED]);
}
-
return NF_ACCEPT;
}
+#endif
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
@@ -258,7 +290,7 @@ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn,
return 0;
}
-static int udp_init_net(struct net *net, u_int16_t proto)
+static int udp_init_net(struct net *net)
{
struct nf_udp_net *un = udp_pernet(net);
struct nf_proto_net *pn = &un->pn;
@@ -278,72 +310,11 @@ static struct nf_proto_net *udp_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.udp.pn;
}
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 =
-{
- .l3proto = PF_INET,
- .l4proto = IPPROTO_UDP,
- .allow_clash = true,
- .packet = udp_packet,
- .new = udp_new,
- .error = udp_error,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
- .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
- .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
- .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
- .nla_policy = nf_ct_port_nla_policy,
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
- .ctnl_timeout = {
- .nlattr_to_obj = udp_timeout_nlattr_to_obj,
- .obj_to_nlattr = udp_timeout_obj_to_nlattr,
- .nlattr_max = CTA_TIMEOUT_UDP_MAX,
- .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX,
- .nla_policy = udp_timeout_nla_policy,
- },
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
- .init_net = udp_init_net,
- .get_net_proto = udp_get_net_proto,
-};
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4);
-
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 =
-{
- .l3proto = PF_INET,
- .l4proto = IPPROTO_UDPLITE,
- .allow_clash = true,
- .packet = udp_packet,
- .new = udp_new,
- .error = udplite_error,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
- .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
- .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
- .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
- .nla_policy = nf_ct_port_nla_policy,
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
- .ctnl_timeout = {
- .nlattr_to_obj = udp_timeout_nlattr_to_obj,
- .obj_to_nlattr = udp_timeout_obj_to_nlattr,
- .nlattr_max = CTA_TIMEOUT_UDP_MAX,
- .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX,
- .nla_policy = udp_timeout_nla_policy,
- },
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
- .init_net = udp_init_net,
- .get_net_proto = udp_get_net_proto,
-};
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4);
-#endif
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
{
- .l3proto = PF_INET6,
.l4proto = IPPROTO_UDP,
.allow_clash = true,
.packet = udp_packet,
- .new = udp_new,
- .error = udp_error,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
@@ -362,17 +333,13 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 =
.init_net = udp_init_net,
.get_net_proto = udp_get_net_proto,
};
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6);
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite =
{
- .l3proto = PF_INET6,
.l4proto = IPPROTO_UDPLITE,
.allow_clash = true,
- .packet = udp_packet,
- .new = udp_new,
- .error = udplite_error,
+ .packet = udplite_packet,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
@@ -391,5 +358,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
.init_net = udp_init_net,
.get_net_proto = udp_get_net_proto,
};
-EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6);
#endif
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 13279f683da9..463d17d349c1 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -292,7 +292,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
if (!net_eq(nf_ct_net(ct), net))
goto release;
- l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
WARN_ON(!l4proto);
ret = -ENOSPC;
@@ -720,10 +720,3 @@ static void __exit nf_conntrack_standalone_fini(void)
module_init(nf_conntrack_standalone_init);
module_exit(nf_conntrack_standalone_fini);
-
-/* Some modules need us, but don't depend directly on any symbol.
- They should call this. */
-void need_conntrack(void)
-{
-}
-EXPORT_SYMBOL_GPL(need_conntrack);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index d8125616edc7..185c633b6872 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -120,7 +120,7 @@ static void flow_offload_fixup_ct_state(struct nf_conn *ct)
if (l4num == IPPROTO_TCP)
flow_offload_fixup_tcp(&ct->proto.tcp);
- l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), l4num);
+ l4proto = __nf_ct_l4proto_find(l4num);
if (!l4proto)
return;
@@ -233,8 +233,8 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
struct flow_offload *flow;
int dir;
- tuplehash = rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
- nf_flow_offload_rhash_params);
+ tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
+ nf_flow_offload_rhash_params);
if (!tuplehash)
return NULL;
@@ -254,20 +254,17 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table,
struct flow_offload_tuple_rhash *tuplehash;
struct rhashtable_iter hti;
struct flow_offload *flow;
- int err;
-
- err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
- if (err)
- return err;
+ int err = 0;
+ rhashtable_walk_enter(&flow_table->rhashtable, &hti);
rhashtable_walk_start(&hti);
while ((tuplehash = rhashtable_walk_next(&hti))) {
if (IS_ERR(tuplehash)) {
- err = PTR_ERR(tuplehash);
- if (err != -EAGAIN)
- goto out;
-
+ if (PTR_ERR(tuplehash) != -EAGAIN) {
+ err = PTR_ERR(tuplehash);
+ break;
+ }
continue;
}
if (tuplehash->tuple.dir)
@@ -277,7 +274,6 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table,
iter(flow, data);
}
-out:
rhashtable_walk_stop(&hti);
rhashtable_walk_exit(&hti);
@@ -290,25 +286,19 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
return (__s32)(flow->timeout - (u32)jiffies) <= 0;
}
-static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
+static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
{
struct flow_offload_tuple_rhash *tuplehash;
struct rhashtable_iter hti;
struct flow_offload *flow;
- int err;
-
- err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
- if (err)
- return 0;
+ rhashtable_walk_enter(&flow_table->rhashtable, &hti);
rhashtable_walk_start(&hti);
while ((tuplehash = rhashtable_walk_next(&hti))) {
if (IS_ERR(tuplehash)) {
- err = PTR_ERR(tuplehash);
- if (err != -EAGAIN)
- goto out;
-
+ if (PTR_ERR(tuplehash) != -EAGAIN)
+ break;
continue;
}
if (tuplehash->tuple.dir)
@@ -321,11 +311,8 @@ static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
FLOW_OFFLOAD_TEARDOWN)))
flow_offload_del(flow_table, flow);
}
-out:
rhashtable_walk_stop(&hti);
rhashtable_walk_exit(&hti);
-
- return 1;
}
static void nf_flow_offload_work_gc(struct work_struct *work)
@@ -514,7 +501,7 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
mutex_unlock(&flowtable_lock);
cancel_delayed_work_sync(&flow_table->gc_work);
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
- WARN_ON(!nf_flow_offload_gc_step(flow_table));
+ nf_flow_offload_gc_step(flow_table);
rhashtable_destroy(&flow_table->rhashtable);
}
EXPORT_SYMBOL_GPL(nf_flow_table_free);
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 15ed91309992..1d291a51cd45 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -254,8 +254,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
return NF_ACCEPT;
- if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
- nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
+ if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
return NF_DROP;
flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
@@ -471,8 +470,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
if (skb_try_make_writable(skb, sizeof(*ip6h)))
return NF_DROP;
- if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
- nf_flow_nat_ipv6(flow, skb, dir) < 0)
+ if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
return NF_DROP;
flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 99606baedda4..38793b95d9bc 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -37,7 +37,7 @@ static void mangle_contents(struct sk_buff *skb,
{
unsigned char *data;
- BUG_ON(skb_is_nonlinear(skb));
+ SKB_LINEAR_ASSERT(skb);
data = skb_network_header(skb) + dataoff;
/* move post-replacement */
@@ -110,8 +110,6 @@ bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
!enlarge_skb(skb, rep_len - match_len))
return false;
- SKB_LINEAR_ASSERT(skb);
-
tcph = (void *)skb->data + protoff;
oldlen = skb->len - protoff;
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index adee04af8d43..78a9e6454ff3 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -52,13 +52,11 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
newdst = 0;
- rcu_read_lock();
indev = __in_dev_get_rcu(skb->dev);
if (indev && indev->ifa_list) {
ifa = indev->ifa_list;
newdst = ifa->ifa_local;
}
- rcu_read_unlock();
if (!newdst)
return NF_DROP;
@@ -97,7 +95,6 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
struct inet6_ifaddr *ifa;
bool addr = false;
- rcu_read_lock();
idev = __in6_dev_get(skb->dev);
if (idev != NULL) {
read_lock_bh(&idev->lock);
@@ -108,7 +105,6 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
}
read_unlock_bh(&idev->lock);
}
- rcu_read_unlock();
if (!addr)
return NF_DROP;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2cfb173cd0b2..f0159eea2978 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -27,6 +27,8 @@
static LIST_HEAD(nf_tables_expressions);
static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
+static LIST_HEAD(nf_tables_destroy_list);
+static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
static u64 table_handle;
enum {
@@ -64,6 +66,8 @@ static void nft_validate_state_update(struct net *net, u8 new_validate_state)
net->nft.validate_state = new_validate_state;
}
+static void nf_tables_trans_destroy_work(struct work_struct *w);
+static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
static void nft_ctx_init(struct nft_ctx *ctx,
struct net *net,
@@ -207,6 +211,18 @@ static int nft_delchain(struct nft_ctx *ctx)
return err;
}
+/* either expr ops provide both activate/deactivate, or neither */
+static bool nft_expr_check_ops(const struct nft_expr_ops *ops)
+{
+ if (!ops)
+ return true;
+
+ if (WARN_ON_ONCE((!ops->activate ^ !ops->deactivate)))
+ return false;
+
+ return true;
+}
+
static void nft_rule_expr_activate(const struct nft_ctx *ctx,
struct nft_rule *rule)
{
@@ -298,7 +314,7 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx)
return 0;
}
-static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
+static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
struct nft_set *set)
{
struct nft_trans *trans;
@@ -318,7 +334,7 @@ static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
return 0;
}
-static int nft_delset(struct nft_ctx *ctx, struct nft_set *set)
+static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
{
int err;
@@ -1005,7 +1021,8 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
static void nf_tables_table_destroy(struct nft_ctx *ctx)
{
- BUG_ON(ctx->table->use > 0);
+ if (WARN_ON(ctx->table->use > 0))
+ return;
rhltable_destroy(&ctx->table->chains_ht);
kfree(ctx->table->name);
@@ -1412,7 +1429,8 @@ static void nf_tables_chain_destroy(struct nft_ctx *ctx)
{
struct nft_chain *chain = ctx->chain;
- BUG_ON(chain->use > 0);
+ if (WARN_ON(chain->use > 0))
+ return;
/* no concurrent access possible anymore */
nf_tables_chain_free_chain_rules(chain);
@@ -1907,6 +1925,9 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
*/
int nft_register_expr(struct nft_expr_type *type)
{
+ if (!nft_expr_check_ops(type->ops))
+ return -EINVAL;
+
nfnl_lock(NFNL_SUBSYS_NFTABLES);
if (type->family == NFPROTO_UNSPEC)
list_add_tail_rcu(&type->list, &nf_tables_expressions);
@@ -2054,6 +2075,10 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
err = PTR_ERR(ops);
goto err1;
}
+ if (!nft_expr_check_ops(ops)) {
+ err = -EINVAL;
+ goto err1;
+ }
} else
ops = type->ops;
@@ -2434,7 +2459,6 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
{
struct nft_expr *expr;
- lockdep_assert_held(&ctx->net->nft.commit_mutex);
/*
* Careful: some expressions might not be initialized in case this
* is called on error from nf_tables_newrule().
@@ -3567,13 +3591,6 @@ static void nft_set_destroy(struct nft_set *set)
kvfree(set);
}
-static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
-{
- list_del_rcu(&set->list);
- nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC);
- nft_set_destroy(set);
-}
-
static int nf_tables_delset(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
@@ -3668,17 +3685,38 @@ bind:
}
EXPORT_SYMBOL_GPL(nf_tables_bind_set);
-void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
+void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding)
{
+ if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
+ nft_is_active(ctx->net, set))
+ list_add_tail_rcu(&set->list, &ctx->table->sets);
+
+ list_add_tail_rcu(&binding->list, &set->bindings);
+}
+EXPORT_SYMBOL_GPL(nf_tables_rebind_set);
+
+void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_binding *binding)
+{
list_del_rcu(&binding->list);
if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
nft_is_active(ctx->net, set))
- nf_tables_set_destroy(ctx, set);
+ list_del_rcu(&set->list);
}
EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
+void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ if (list_empty(&set->bindings) && nft_set_is_anonymous(set) &&
+ nft_is_active(ctx->net, set)) {
+ nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC);
+ nft_set_destroy(set);
+ }
+}
+EXPORT_SYMBOL_GPL(nf_tables_destroy_set);
+
const struct nft_set_ext_type nft_set_ext_types[] = {
[NFT_SET_EXT_KEY] = {
.align = __alignof__(u32),
@@ -6191,19 +6229,28 @@ static void nft_commit_release(struct nft_trans *trans)
nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
break;
}
+
+ if (trans->put_net)
+ put_net(trans->ctx.net);
+
kfree(trans);
}
-static void nf_tables_commit_release(struct net *net)
+static void nf_tables_trans_destroy_work(struct work_struct *w)
{
struct nft_trans *trans, *next;
+ LIST_HEAD(head);
- if (list_empty(&net->nft.commit_list))
+ spin_lock(&nf_tables_destroy_list_lock);
+ list_splice_init(&nf_tables_destroy_list, &head);
+ spin_unlock(&nf_tables_destroy_list_lock);
+
+ if (list_empty(&head))
return;
synchronize_rcu();
- list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ list_for_each_entry_safe(trans, next, &head, list) {
list_del(&trans->list);
nft_commit_release(trans);
}
@@ -6334,6 +6381,37 @@ static void nft_chain_del(struct nft_chain *chain)
list_del_rcu(&chain->list);
}
+static void nf_tables_commit_release(struct net *net)
+{
+ struct nft_trans *trans;
+
+ /* all side effects have to be made visible.
+ * For example, if a chain named 'foo' has been deleted, a
+ * new transaction must not find it anymore.
+ *
+ * Memory reclaim happens asynchronously from work queue
+ * to prevent expensive synchronize_rcu() in commit phase.
+ */
+ if (list_empty(&net->nft.commit_list)) {
+ mutex_unlock(&net->nft.commit_mutex);
+ return;
+ }
+
+ trans = list_last_entry(&net->nft.commit_list,
+ struct nft_trans, list);
+ get_net(trans->ctx.net);
+ WARN_ON_ONCE(trans->put_net);
+
+ trans->put_net = true;
+ spin_lock(&nf_tables_destroy_list_lock);
+ list_splice_tail_init(&net->nft.commit_list, &nf_tables_destroy_list);
+ spin_unlock(&nf_tables_destroy_list_lock);
+
+ mutex_unlock(&net->nft.commit_mutex);
+
+ schedule_work(&trans_destroy_work);
+}
+
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nft_trans *trans, *next;
@@ -6495,9 +6573,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
}
}
- nf_tables_commit_release(net);
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
- mutex_unlock(&net->nft.commit_mutex);
+ nf_tables_commit_release(net);
return 0;
}
@@ -7168,7 +7245,8 @@ int __nft_release_basechain(struct nft_ctx *ctx)
{
struct nft_rule *rule, *nr;
- BUG_ON(!nft_is_base_chain(ctx->chain));
+ if (WARN_ON(!nft_is_base_chain(ctx->chain)))
+ return 0;
nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
@@ -7271,6 +7349,7 @@ static int __init nf_tables_module_init(void)
{
int err;
+ spin_lock_init(&nf_tables_destroy_list_lock);
err = register_pernet_subsys(&nf_tables_net_ops);
if (err < 0)
return err;
@@ -7310,6 +7389,7 @@ static void __exit nf_tables_module_exit(void)
unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
nft_chain_filter_fini();
unregister_pernet_subsys(&nf_tables_net_ops);
+ cancel_work_sync(&trans_destroy_work);
rcu_barrier();
nf_tables_core_module_exit();
}
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index ffd5c0f9412b..3fbce3b9c5ec 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -249,12 +249,24 @@ static struct nft_expr_type *nft_basic_types[] = {
&nft_exthdr_type,
};
+static struct nft_object_type *nft_basic_objects[] = {
+#ifdef CONFIG_NETWORK_SECMARK
+ &nft_secmark_obj_type,
+#endif
+};
+
int __init nf_tables_core_module_init(void)
{
- int err, i;
+ int err, i, j = 0;
+
+ for (i = 0; i < ARRAY_SIZE(nft_basic_objects); i++) {
+ err = nft_register_obj(nft_basic_objects[i]);
+ if (err)
+ goto err;
+ }
- for (i = 0; i < ARRAY_SIZE(nft_basic_types); i++) {
- err = nft_register_expr(nft_basic_types[i]);
+ for (j = 0; j < ARRAY_SIZE(nft_basic_types); j++) {
+ err = nft_register_expr(nft_basic_types[j]);
if (err)
goto err;
}
@@ -262,8 +274,12 @@ int __init nf_tables_core_module_init(void)
return 0;
err:
+ while (j-- > 0)
+ nft_unregister_expr(nft_basic_types[j]);
+
while (i-- > 0)
- nft_unregister_expr(nft_basic_types[i]);
+ nft_unregister_obj(nft_basic_objects[i]);
+
return err;
}
@@ -274,4 +290,8 @@ void nf_tables_core_module_exit(void)
i = ARRAY_SIZE(nft_basic_types);
while (i-- > 0)
nft_unregister_expr(nft_basic_types[i]);
+
+ i = ARRAY_SIZE(nft_basic_objects);
+ while (i-- > 0)
+ nft_unregister_obj(nft_basic_objects[i]);
}
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index a30f8ba4b89a..b48545b84ce8 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -53,9 +53,6 @@ ctnl_timeout_parse_policy(void *timeout,
struct nlattr **tb;
int ret = 0;
- if (!l4proto->ctnl_timeout.nlattr_to_obj)
- return 0;
-
tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb),
GFP_KERNEL);
@@ -125,7 +122,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
return -EBUSY;
}
- l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+ l4proto = nf_ct_l4proto_find_get(l4num);
/* This protocol is not supportted, skip. */
if (l4proto->l4proto != l4num) {
@@ -167,6 +164,8 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
const struct nf_conntrack_l4proto *l4proto = timeout->timeout.l4proto;
+ struct nlattr *nest_parms;
+ int ret;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
@@ -186,22 +185,15 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
htonl(refcount_read(&timeout->refcnt))))
goto nla_put_failure;
- if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
- struct nlattr *nest_parms;
- int ret;
-
- nest_parms = nla_nest_start(skb,
- CTA_TIMEOUT_DATA | NLA_F_NESTED);
- if (!nest_parms)
- goto nla_put_failure;
+ nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA | NLA_F_NESTED);
+ if (!nest_parms)
+ goto nla_put_failure;
- ret = l4proto->ctnl_timeout.obj_to_nlattr(skb,
- &timeout->timeout.data);
- if (ret < 0)
- goto nla_put_failure;
+ ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->timeout.data);
+ if (ret < 0)
+ goto nla_put_failure;
- nla_nest_end(skb, nest_parms);
- }
+ nla_nest_end(skb, nest_parms);
nlmsg_end(skb, nlh);
return skb->len;
@@ -369,7 +361,7 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
- l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+ l4proto = nf_ct_l4proto_find_get(l4num);
/* This protocol is not supported, skip. */
if (l4proto->l4proto != l4num) {
@@ -391,12 +383,14 @@ err:
static int
cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
- u32 seq, u32 type, int event,
+ u32 seq, u32 type, int event, u16 l3num,
const struct nf_conntrack_l4proto *l4proto)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
+ struct nlattr *nest_parms;
+ int ret;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
@@ -408,25 +402,19 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
- if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l4proto->l3proto)) ||
+ if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l3num)) ||
nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto))
goto nla_put_failure;
- if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
- struct nlattr *nest_parms;
- int ret;
-
- nest_parms = nla_nest_start(skb,
- CTA_TIMEOUT_DATA | NLA_F_NESTED);
- if (!nest_parms)
- goto nla_put_failure;
+ nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA | NLA_F_NESTED);
+ if (!nest_parms)
+ goto nla_put_failure;
- ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL);
- if (ret < 0)
- goto nla_put_failure;
+ ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL);
+ if (ret < 0)
+ goto nla_put_failure;
- nla_nest_end(skb, nest_parms);
- }
+ nla_nest_end(skb, nest_parms);
nlmsg_end(skb, nlh);
return skb->len;
@@ -454,7 +442,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
- l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+ l4proto = nf_ct_l4proto_find_get(l4num);
/* This protocol is not supported, skip. */
if (l4proto->l4proto != l4num) {
@@ -472,6 +460,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
nlh->nlmsg_seq,
NFNL_MSG_TYPE(nlh->nlmsg_type),
IPCTNL_MSG_TIMEOUT_DEFAULT_SET,
+ l3num,
l4proto);
if (ret <= 0) {
kfree_skb(skb2);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index d33094f4ec41..43041f087eb3 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -765,7 +765,7 @@ __nfqnl_enqueue_packet_gso(struct net *net, struct nfqnl_instance *queue,
return ret;
}
- skb->next = NULL;
+ skb_mark_not_on_list(skb);
entry_seg = nf_queue_entry_dup(entry);
if (entry_seg) {
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index fa90a8402845..79d48c1d06f4 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -79,7 +79,8 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &desc,
tb[NFTA_CMP_DATA]);
- BUG_ON(err < 0);
+ if (err < 0)
+ return err;
priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
err = nft_validate_register_load(priv->sreg, desc.len);
@@ -129,7 +130,8 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx,
err = nft_data_init(NULL, &data, sizeof(data), &desc,
tb[NFTA_CMP_DATA]);
- BUG_ON(err < 0);
+ if (err < 0)
+ return err;
priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
err = nft_validate_register_load(priv->sreg, desc.len);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 5dd87748afa8..586627c361df 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -279,7 +279,7 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
{
const struct nft_ct *priv = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
-#ifdef CONFIG_NF_CONNTRACK_MARK
+#if defined(CONFIG_NF_CONNTRACK_MARK) || defined(CONFIG_NF_CONNTRACK_SECMARK)
u32 value = regs->data[priv->sreg];
#endif
enum ip_conntrack_info ctinfo;
@@ -298,6 +298,14 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
}
break;
#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+ case NFT_CT_SECMARK:
+ if (ct->secmark != value) {
+ ct->secmark = value;
+ nf_conntrack_event_cache(IPCT_SECMARK, ct);
+ }
+ break;
+#endif
#ifdef CONFIG_NF_CONNTRACK_LABELS
case NFT_CT_LABELS:
nf_connlabels_replace(ct,
@@ -565,6 +573,13 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
len = sizeof(u32);
break;
#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+ case NFT_CT_SECMARK:
+ if (tb[NFTA_CT_DIRECTION])
+ return -EINVAL;
+ len = sizeof(u32);
+ break;
+#endif
default:
return -EOPNOTSUPP;
}
@@ -776,9 +791,6 @@ nft_ct_timeout_parse_policy(void *timeouts,
struct nlattr **tb;
int ret = 0;
- if (!l4proto->ctnl_timeout.nlattr_to_obj)
- return 0;
-
tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb),
GFP_KERNEL);
@@ -858,7 +870,7 @@ static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx,
l4num = nla_get_u8(tb[NFTA_CT_TIMEOUT_L4PROTO]);
priv->l4proto = l4num;
- l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+ l4proto = nf_ct_l4proto_find_get(l4num);
if (l4proto->l4proto != l4num) {
ret = -EOPNOTSUPP;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 6e91a37d57f2..07d4efd3d851 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -235,14 +235,31 @@ err1:
return err;
}
+static void nft_dynset_activate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_dynset *priv = nft_expr_priv(expr);
+
+ nf_tables_rebind_set(ctx, priv->set, &priv->binding);
+}
+
+static void nft_dynset_deactivate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_dynset *priv = nft_expr_priv(expr);
+
+ nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+}
+
static void nft_dynset_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_dynset *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(ctx, priv->set, &priv->binding);
if (priv->expr != NULL)
nft_expr_destroy(ctx, priv->expr);
+
+ nf_tables_destroy_set(ctx, priv->set);
}
static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -279,6 +296,8 @@ static const struct nft_expr_ops nft_dynset_ops = {
.eval = nft_dynset_eval,
.init = nft_dynset_init,
.destroy = nft_dynset_destroy,
+ .activate = nft_dynset_activate,
+ .deactivate = nft_dynset_deactivate,
.dump = nft_dynset_dump,
};
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index ad13e8643599..227b2b15a19c 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -121,12 +121,28 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
return 0;
}
+static void nft_lookup_activate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_lookup *priv = nft_expr_priv(expr);
+
+ nf_tables_rebind_set(ctx, priv->set, &priv->binding);
+}
+
+static void nft_lookup_deactivate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_lookup *priv = nft_expr_priv(expr);
+
+ nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+}
+
static void nft_lookup_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_lookup *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+ nf_tables_destroy_set(ctx, priv->set);
}
static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -209,6 +225,8 @@ static const struct nft_expr_ops nft_lookup_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
.eval = nft_lookup_eval,
.init = nft_lookup_init,
+ .activate = nft_lookup_activate,
+ .deactivate = nft_lookup_deactivate,
.destroy = nft_lookup_destroy,
.dump = nft_lookup_dump,
.validate = nft_lookup_validate,
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 297fe7d97c18..6180626c3f80 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -284,6 +284,11 @@ static void nft_meta_set_eval(const struct nft_expr *expr,
skb->nf_trace = !!value8;
break;
+#ifdef CONFIG_NETWORK_SECMARK
+ case NFT_META_SECMARK:
+ skb->secmark = value;
+ break;
+#endif
default:
WARN_ON(1);
}
@@ -436,6 +441,9 @@ static int nft_meta_set_init(const struct nft_ctx *ctx,
switch (priv->key) {
case NFT_META_MARK:
case NFT_META_PRIORITY:
+#ifdef CONFIG_NETWORK_SECMARK
+ case NFT_META_SECMARK:
+#endif
len = sizeof(u32);
break;
case NFT_META_NFTRACE:
@@ -543,3 +551,111 @@ struct nft_expr_type nft_meta_type __read_mostly = {
.maxattr = NFTA_META_MAX,
.owner = THIS_MODULE,
};
+
+#ifdef CONFIG_NETWORK_SECMARK
+struct nft_secmark {
+ u32 secid;
+ char *ctx;
+};
+
+static const struct nla_policy nft_secmark_policy[NFTA_SECMARK_MAX + 1] = {
+ [NFTA_SECMARK_CTX] = { .type = NLA_STRING, .len = NFT_SECMARK_CTX_MAXLEN },
+};
+
+static int nft_secmark_compute_secid(struct nft_secmark *priv)
+{
+ u32 tmp_secid = 0;
+ int err;
+
+ err = security_secctx_to_secid(priv->ctx, strlen(priv->ctx), &tmp_secid);
+ if (err)
+ return err;
+
+ if (!tmp_secid)
+ return -ENOENT;
+
+ err = security_secmark_relabel_packet(tmp_secid);
+ if (err)
+ return err;
+
+ priv->secid = tmp_secid;
+ return 0;
+}
+
+static void nft_secmark_obj_eval(struct nft_object *obj, struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_secmark *priv = nft_obj_data(obj);
+ struct sk_buff *skb = pkt->skb;
+
+ skb->secmark = priv->secid;
+}
+
+static int nft_secmark_obj_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
+ struct nft_object *obj)
+{
+ struct nft_secmark *priv = nft_obj_data(obj);
+ int err;
+
+ if (tb[NFTA_SECMARK_CTX] == NULL)
+ return -EINVAL;
+
+ priv->ctx = nla_strdup(tb[NFTA_SECMARK_CTX], GFP_KERNEL);
+ if (!priv->ctx)
+ return -ENOMEM;
+
+ err = nft_secmark_compute_secid(priv);
+ if (err) {
+ kfree(priv->ctx);
+ return err;
+ }
+
+ security_secmark_refcount_inc();
+
+ return 0;
+}
+
+static int nft_secmark_obj_dump(struct sk_buff *skb, struct nft_object *obj,
+ bool reset)
+{
+ struct nft_secmark *priv = nft_obj_data(obj);
+ int err;
+
+ if (nla_put_string(skb, NFTA_SECMARK_CTX, priv->ctx))
+ return -1;
+
+ if (reset) {
+ err = nft_secmark_compute_secid(priv);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static void nft_secmark_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
+{
+ struct nft_secmark *priv = nft_obj_data(obj);
+
+ security_secmark_refcount_dec();
+
+ kfree(priv->ctx);
+}
+
+static const struct nft_object_ops nft_secmark_obj_ops = {
+ .type = &nft_secmark_obj_type,
+ .size = sizeof(struct nft_secmark),
+ .init = nft_secmark_obj_init,
+ .eval = nft_secmark_obj_eval,
+ .dump = nft_secmark_obj_dump,
+ .destroy = nft_secmark_obj_destroy,
+};
+struct nft_object_type nft_secmark_obj_type __read_mostly = {
+ .type = NFT_OBJECT_SECMARK,
+ .ops = &nft_secmark_obj_ops,
+ .maxattr = NFTA_SECMARK_MAX,
+ .policy = nft_secmark_policy,
+ .owner = THIS_MODULE,
+};
+#endif /* CONFIG_NETWORK_SECMARK */
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index cdf348f751ec..a3185ca2a3a9 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -155,12 +155,28 @@ nla_put_failure:
return -1;
}
+static void nft_objref_map_activate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_objref_map *priv = nft_expr_priv(expr);
+
+ nf_tables_rebind_set(ctx, priv->set, &priv->binding);
+}
+
+static void nft_objref_map_deactivate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_objref_map *priv = nft_expr_priv(expr);
+
+ nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+}
+
static void nft_objref_map_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
struct nft_objref_map *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+ nf_tables_destroy_set(ctx, priv->set);
}
static struct nft_expr_type nft_objref_type;
@@ -169,6 +185,8 @@ static const struct nft_expr_ops nft_objref_map_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)),
.eval = nft_objref_map_eval,
.init = nft_objref_map_init,
+ .activate = nft_objref_map_activate,
+ .deactivate = nft_objref_map_deactivate,
.destroy = nft_objref_map_destroy,
.dump = nft_objref_map_dump,
};
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index 29f5bd2377b0..b48e58cceeb7 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -94,7 +94,8 @@ static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX + 1] = {
int nft_reject_icmp_code(u8 code)
{
- BUG_ON(code > NFT_REJECT_ICMPX_MAX);
+ if (WARN_ON_ONCE(code > NFT_REJECT_ICMPX_MAX))
+ return ICMP_NET_UNREACH;
return icmp_code_v4[code];
}
@@ -111,7 +112,8 @@ static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX + 1] = {
int nft_reject_icmpv6_code(u8 code)
{
- BUG_ON(code > NFT_REJECT_ICMPX_MAX);
+ if (WARN_ON_ONCE(code > NFT_REJECT_ICMPX_MAX))
+ return ICMPV6_NOROUTE;
return icmp_code_v6[code];
}
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 76dba9f6b6f6..f35fa33913ae 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -90,6 +90,11 @@ static void nft_rt_get_eval(const struct nft_expr *expr,
case NFT_RT_TCPMSS:
nft_reg_store16(dest, get_tcpmss(pkt, dst));
break;
+#ifdef CONFIG_XFRM
+ case NFT_RT_XFRM:
+ nft_reg_store8(dest, !!dst->xfrm);
+ break;
+#endif
default:
WARN_ON(1);
goto err;
@@ -130,6 +135,11 @@ static int nft_rt_get_init(const struct nft_ctx *ctx,
case NFT_RT_TCPMSS:
len = sizeof(u16);
break;
+#ifdef CONFIG_XFRM
+ case NFT_RT_XFRM:
+ len = sizeof(u8);
+ break;
+#endif
default:
return -EOPNOTSUPP;
}
@@ -164,6 +174,7 @@ static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *exp
case NFT_RT_NEXTHOP4:
case NFT_RT_NEXTHOP6:
case NFT_RT_CLASSID:
+ case NFT_RT_XFRM:
return 0;
case NFT_RT_TCPMSS:
hooks = (1 << NF_INET_FORWARD) |
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 015124e649cb..339a9dd1c832 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -88,7 +88,7 @@ static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
.key = key,
};
- he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
+ he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
if (he != NULL)
*ext = &he->ext;
@@ -106,7 +106,7 @@ static void *nft_rhash_get(const struct net *net, const struct nft_set *set,
.key = elem->key.val.data,
};
- he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
+ he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
if (he != NULL)
return he;
@@ -129,7 +129,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
.key = key,
};
- he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
+ he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
if (he != NULL)
goto out;
@@ -217,7 +217,7 @@ static void *nft_rhash_deactivate(const struct net *net,
};
rcu_read_lock();
- he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
+ he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
if (he != NULL &&
!nft_rhash_flush(net, set, he))
he = NULL;
@@ -244,21 +244,15 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_rhash_elem *he;
struct rhashtable_iter hti;
struct nft_set_elem elem;
- int err;
-
- err = rhashtable_walk_init(&priv->ht, &hti, GFP_ATOMIC);
- iter->err = err;
- if (err)
- return;
+ rhashtable_walk_enter(&priv->ht, &hti);
rhashtable_walk_start(&hti);
while ((he = rhashtable_walk_next(&hti))) {
if (IS_ERR(he)) {
- err = PTR_ERR(he);
- if (err != -EAGAIN) {
- iter->err = err;
- goto out;
+ if (PTR_ERR(he) != -EAGAIN) {
+ iter->err = PTR_ERR(he);
+ break;
}
continue;
@@ -275,13 +269,11 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
iter->err = iter->fn(ctx, set, iter, &elem);
if (iter->err < 0)
- goto out;
+ break;
cont:
iter->count++;
}
-
-out:
rhashtable_walk_stop(&hti);
rhashtable_walk_exit(&hti);
}
@@ -293,21 +285,17 @@ static void nft_rhash_gc(struct work_struct *work)
struct nft_rhash *priv;
struct nft_set_gc_batch *gcb = NULL;
struct rhashtable_iter hti;
- int err;
priv = container_of(work, struct nft_rhash, gc_work.work);
set = nft_set_container_of(priv);
- err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
- if (err)
- goto schedule;
-
+ rhashtable_walk_enter(&priv->ht, &hti);
rhashtable_walk_start(&hti);
while ((he = rhashtable_walk_next(&hti))) {
if (IS_ERR(he)) {
if (PTR_ERR(he) != -EAGAIN)
- goto out;
+ break;
continue;
}
@@ -326,17 +314,15 @@ gc:
gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
if (gcb == NULL)
- goto out;
+ break;
rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
atomic_dec(&set->nelems);
nft_set_gc_batch_add(gcb, he);
}
-out:
rhashtable_walk_stop(&hti);
rhashtable_walk_exit(&hti);
nft_set_gc_batch_complete(gcb);
-schedule:
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
}
diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
new file mode 100644
index 000000000000..3cf71a2e375b
--- /dev/null
+++ b/net/netfilter/nft_xfrm.c
@@ -0,0 +1,293 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Generic part shared by ipv4 and ipv6 backends.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <linux/in.h>
+#include <net/xfrm.h>
+
+static const struct nla_policy nft_xfrm_policy[NFTA_XFRM_MAX + 1] = {
+ [NFTA_XFRM_KEY] = { .type = NLA_U32 },
+ [NFTA_XFRM_DIR] = { .type = NLA_U8 },
+ [NFTA_XFRM_SPNUM] = { .type = NLA_U32 },
+ [NFTA_XFRM_DREG] = { .type = NLA_U32 },
+};
+
+struct nft_xfrm {
+ enum nft_xfrm_keys key:8;
+ enum nft_registers dreg:8;
+ u8 dir;
+ u8 spnum;
+};
+
+static int nft_xfrm_get_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_xfrm *priv = nft_expr_priv(expr);
+ unsigned int len = 0;
+ u32 spnum = 0;
+ u8 dir;
+
+ if (!tb[NFTA_XFRM_KEY] || !tb[NFTA_XFRM_DIR] || !tb[NFTA_XFRM_DREG])
+ return -EINVAL;
+
+ switch (ctx->family) {
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ case NFPROTO_INET:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ priv->key = ntohl(nla_get_u32(tb[NFTA_XFRM_KEY]));
+ switch (priv->key) {
+ case NFT_XFRM_KEY_REQID:
+ case NFT_XFRM_KEY_SPI:
+ len = sizeof(u32);
+ break;
+ case NFT_XFRM_KEY_DADDR_IP4:
+ case NFT_XFRM_KEY_SADDR_IP4:
+ len = sizeof(struct in_addr);
+ break;
+ case NFT_XFRM_KEY_DADDR_IP6:
+ case NFT_XFRM_KEY_SADDR_IP6:
+ len = sizeof(struct in6_addr);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ dir = nla_get_u8(tb[NFTA_XFRM_DIR]);
+ switch (dir) {
+ case XFRM_POLICY_IN:
+ case XFRM_POLICY_OUT:
+ priv->dir = dir;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (tb[NFTA_XFRM_SPNUM])
+ spnum = ntohl(nla_get_be32(tb[NFTA_XFRM_SPNUM]));
+
+ if (spnum >= XFRM_MAX_DEPTH)
+ return -ERANGE;
+
+ priv->spnum = spnum;
+
+ priv->dreg = nft_parse_register(tb[NFTA_XFRM_DREG]);
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, len);
+}
+
+/* Return true if key asks for daddr/saddr and current
+ * state does have a valid address (BEET, TUNNEL).
+ */
+static bool xfrm_state_addr_ok(enum nft_xfrm_keys k, u8 family, u8 mode)
+{
+ switch (k) {
+ case NFT_XFRM_KEY_DADDR_IP4:
+ case NFT_XFRM_KEY_SADDR_IP4:
+ if (family == NFPROTO_IPV4)
+ break;
+ return false;
+ case NFT_XFRM_KEY_DADDR_IP6:
+ case NFT_XFRM_KEY_SADDR_IP6:
+ if (family == NFPROTO_IPV6)
+ break;
+ return false;
+ default:
+ return true;
+ }
+
+ return mode == XFRM_MODE_BEET || mode == XFRM_MODE_TUNNEL;
+}
+
+static void nft_xfrm_state_get_key(const struct nft_xfrm *priv,
+ struct nft_regs *regs,
+ const struct xfrm_state *state,
+ u8 family)
+{
+ u32 *dest = &regs->data[priv->dreg];
+
+ if (!xfrm_state_addr_ok(priv->key, family, state->props.mode)) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+
+ switch (priv->key) {
+ case NFT_XFRM_KEY_UNSPEC:
+ case __NFT_XFRM_KEY_MAX:
+ WARN_ON_ONCE(1);
+ break;
+ case NFT_XFRM_KEY_DADDR_IP4:
+ *dest = state->id.daddr.a4;
+ return;
+ case NFT_XFRM_KEY_DADDR_IP6:
+ memcpy(dest, &state->id.daddr.in6, sizeof(struct in6_addr));
+ return;
+ case NFT_XFRM_KEY_SADDR_IP4:
+ *dest = state->props.saddr.a4;
+ return;
+ case NFT_XFRM_KEY_SADDR_IP6:
+ memcpy(dest, &state->props.saddr.in6, sizeof(struct in6_addr));
+ return;
+ case NFT_XFRM_KEY_REQID:
+ *dest = state->props.reqid;
+ return;
+ case NFT_XFRM_KEY_SPI:
+ *dest = state->id.spi;
+ return;
+ }
+
+ regs->verdict.code = NFT_BREAK;
+}
+
+static void nft_xfrm_get_eval_in(const struct nft_xfrm *priv,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct sec_path *sp = pkt->skb->sp;
+ const struct xfrm_state *state;
+
+ if (sp == NULL || sp->len <= priv->spnum) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+
+ state = sp->xvec[priv->spnum];
+ nft_xfrm_state_get_key(priv, regs, state, nft_pf(pkt));
+}
+
+static void nft_xfrm_get_eval_out(const struct nft_xfrm *priv,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct dst_entry *dst = skb_dst(pkt->skb);
+ int i;
+
+ for (i = 0; dst && dst->xfrm;
+ dst = ((const struct xfrm_dst *)dst)->child, i++) {
+ if (i < priv->spnum)
+ continue;
+
+ nft_xfrm_state_get_key(priv, regs, dst->xfrm, nft_pf(pkt));
+ return;
+ }
+
+ regs->verdict.code = NFT_BREAK;
+}
+
+static void nft_xfrm_get_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_xfrm *priv = nft_expr_priv(expr);
+
+ switch (priv->dir) {
+ case XFRM_POLICY_IN:
+ nft_xfrm_get_eval_in(priv, regs, pkt);
+ break;
+ case XFRM_POLICY_OUT:
+ nft_xfrm_get_eval_out(priv, regs, pkt);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ regs->verdict.code = NFT_BREAK;
+ break;
+ }
+}
+
+static int nft_xfrm_get_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
+{
+ const struct nft_xfrm *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_XFRM_DREG, priv->dreg))
+ return -1;
+
+ if (nla_put_be32(skb, NFTA_XFRM_KEY, htonl(priv->key)))
+ return -1;
+ if (nla_put_u8(skb, NFTA_XFRM_DIR, priv->dir))
+ return -1;
+ if (nla_put_be32(skb, NFTA_XFRM_SPNUM, htonl(priv->spnum)))
+ return -1;
+
+ return 0;
+}
+
+static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ const struct nft_xfrm *priv = nft_expr_priv(expr);
+ unsigned int hooks;
+
+ switch (priv->dir) {
+ case XFRM_POLICY_IN:
+ hooks = (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_PRE_ROUTING);
+ break;
+ case XFRM_POLICY_OUT:
+ hooks = (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
+
+static struct nft_expr_type nft_xfrm_type;
+static const struct nft_expr_ops nft_xfrm_get_ops = {
+ .type = &nft_xfrm_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_xfrm)),
+ .eval = nft_xfrm_get_eval,
+ .init = nft_xfrm_get_init,
+ .dump = nft_xfrm_get_dump,
+ .validate = nft_xfrm_validate,
+};
+
+static struct nft_expr_type nft_xfrm_type __read_mostly = {
+ .name = "xfrm",
+ .ops = &nft_xfrm_get_ops,
+ .policy = nft_xfrm_policy,
+ .maxattr = NFTA_XFRM_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_xfrm_module_init(void)
+{
+ return nft_register_expr(&nft_xfrm_type);
+}
+
+static void __exit nft_xfrm_module_exit(void)
+{
+ nft_unregister_expr(&nft_xfrm_type);
+}
+
+module_init(nft_xfrm_module_init);
+module_exit(nft_xfrm_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("nf_tables: xfrm/IPSec matching");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_AUTHOR("Máté Eckl <ecklm94@gmail.com>");
+MODULE_ALIAS_NFT_EXPR("xfrm");
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 89457efd2e00..2c7a4b80206f 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -159,7 +159,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
/* Make sure the timeout policy matches any existing protocol tracker,
* otherwise default to generic.
*/
- l4proto = __nf_ct_l4proto_find(par->family, proto);
+ l4proto = __nf_ct_l4proto_find(proto);
if (timeout->l4proto->l4proto != l4proto->l4proto) {
ret = -EINVAL;
pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n",
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index 5ee859193783..c6acfc2d9c84 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -68,8 +68,6 @@ struct idletimer_tg *__idletimer_tg_find_by_label(const char *label)
{
struct idletimer_tg *entry;
- BUG_ON(!label);
-
list_for_each_entry(entry, &idletimer_tg_list, entry) {
if (!strcmp(label, entry->attr.attr.name))
return entry;
@@ -172,8 +170,6 @@ static unsigned int idletimer_tg_target(struct sk_buff *skb,
pr_debug("resetting timer %s, timeout period %u\n",
info->label, info->timeout);
- BUG_ON(!info->timer);
-
mod_timer(&info->timer->timer,
msecs_to_jiffies(info->timeout * 1000) + jiffies);
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 4ad5fe27e08b..f16202d26c20 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -35,8 +35,6 @@ secmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
u32 secmark = 0;
const struct xt_secmark_target_info *info = par->targinfo;
- BUG_ON(info->mode != mode);
-
switch (mode) {
case SECMARK_MODE_SEL:
secmark = info->secid;
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index 5d92e1781980..5cb1ecb29ea4 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -68,6 +68,38 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par)
return 0;
}
+static int cgroup_mt_check_v2(const struct xt_mtchk_param *par)
+{
+ struct xt_cgroup_info_v2 *info = par->matchinfo;
+ struct cgroup *cgrp;
+
+ if ((info->invert_path & ~1) || (info->invert_classid & ~1))
+ return -EINVAL;
+
+ if (!info->has_path && !info->has_classid) {
+ pr_info("xt_cgroup: no path or classid specified\n");
+ return -EINVAL;
+ }
+
+ if (info->has_path && info->has_classid) {
+ pr_info_ratelimited("path and classid specified\n");
+ return -EINVAL;
+ }
+
+ info->priv = NULL;
+ if (info->has_path) {
+ cgrp = cgroup_get_from_path(info->path);
+ if (IS_ERR(cgrp)) {
+ pr_info_ratelimited("invalid path, errno=%ld\n",
+ PTR_ERR(cgrp));
+ return -EINVAL;
+ }
+ info->priv = cgrp;
+ }
+
+ return 0;
+}
+
static bool
cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
{
@@ -99,6 +131,24 @@ static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
info->invert_classid;
}
+static bool cgroup_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_cgroup_info_v2 *info = par->matchinfo;
+ struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data;
+ struct cgroup *ancestor = info->priv;
+ struct sock *sk = skb->sk;
+
+ if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk)))
+ return false;
+
+ if (ancestor)
+ return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^
+ info->invert_path;
+ else
+ return (info->classid == sock_cgroup_classid(skcd)) ^
+ info->invert_classid;
+}
+
static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par)
{
struct xt_cgroup_info_v1 *info = par->matchinfo;
@@ -107,6 +157,14 @@ static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par)
cgroup_put(info->priv);
}
+static void cgroup_mt_destroy_v2(const struct xt_mtdtor_param *par)
+{
+ struct xt_cgroup_info_v2 *info = par->matchinfo;
+
+ if (info->priv)
+ cgroup_put(info->priv);
+}
+
static struct xt_match cgroup_mt_reg[] __read_mostly = {
{
.name = "cgroup",
@@ -134,6 +192,20 @@ static struct xt_match cgroup_mt_reg[] __read_mostly = {
(1 << NF_INET_POST_ROUTING) |
(1 << NF_INET_LOCAL_IN),
},
+ {
+ .name = "cgroup",
+ .revision = 2,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = cgroup_mt_check_v2,
+ .match = cgroup_mt_v2,
+ .matchsize = sizeof(struct xt_cgroup_info_v2),
+ .usersize = offsetof(struct xt_cgroup_info_v2, priv),
+ .destroy = cgroup_mt_destroy_v2,
+ .me = THIS_MODULE,
+ .hooks = (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_IN),
+ },
};
static int __init cgroup_mt_init(void)
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 10d61a6eed71..fceae245eb03 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -11,11 +11,6 @@
#include <linux/netfilter/xt_quota.h>
#include <linux/module.h>
-struct xt_quota_priv {
- spinlock_t lock;
- uint64_t quota;
-};
-
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
MODULE_DESCRIPTION("Xtables: countdown quota match");
@@ -26,54 +21,48 @@ static bool
quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
struct xt_quota_info *q = (void *)par->matchinfo;
- struct xt_quota_priv *priv = q->master;
+ u64 current_count = atomic64_read(&q->counter);
bool ret = q->flags & XT_QUOTA_INVERT;
-
- spin_lock_bh(&priv->lock);
- if (priv->quota >= skb->len) {
- priv->quota -= skb->len;
- ret = !ret;
- } else {
- /* we do not allow even small packets from now on */
- priv->quota = 0;
- }
- spin_unlock_bh(&priv->lock);
-
- return ret;
+ u64 old_count, new_count;
+
+ do {
+ if (current_count == 1)
+ return ret;
+ if (current_count <= skb->len) {
+ atomic64_set(&q->counter, 1);
+ return ret;
+ }
+ old_count = current_count;
+ new_count = current_count - skb->len;
+ current_count = atomic64_cmpxchg(&q->counter, old_count,
+ new_count);
+ } while (current_count != old_count);
+ return !ret;
}
static int quota_mt_check(const struct xt_mtchk_param *par)
{
struct xt_quota_info *q = par->matchinfo;
+ BUILD_BUG_ON(sizeof(atomic64_t) != sizeof(__u64));
+
if (q->flags & ~XT_QUOTA_MASK)
return -EINVAL;
+ if (atomic64_read(&q->counter) > q->quota + 1)
+ return -ERANGE;
- q->master = kmalloc(sizeof(*q->master), GFP_KERNEL);
- if (q->master == NULL)
- return -ENOMEM;
-
- spin_lock_init(&q->master->lock);
- q->master->quota = q->quota;
+ if (atomic64_read(&q->counter) == 0)
+ atomic64_set(&q->counter, q->quota + 1);
return 0;
}
-static void quota_mt_destroy(const struct xt_mtdtor_param *par)
-{
- const struct xt_quota_info *q = par->matchinfo;
-
- kfree(q->master);
-}
-
static struct xt_match quota_mt_reg __read_mostly = {
.name = "quota",
.revision = 0,
.family = NFPROTO_UNSPEC,
.match = quota_mt,
.checkentry = quota_mt_check,
- .destroy = quota_mt_destroy,
.matchsize = sizeof(struct xt_quota_info),
- .usersize = offsetof(struct xt_quota_info, master),
.me = THIS_MODULE,
};
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 930d17fa906c..e613a9f89600 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -574,11 +574,6 @@ static int netlink_insert(struct sock *sk, u32 portid)
if (nlk_sk(sk)->bound)
goto err;
- err = -ENOMEM;
- if (BITS_PER_LONG > 32 &&
- unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX))
- goto err;
-
nlk_sk(sk)->portid = portid;
sock_hold(sk);
@@ -993,7 +988,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
struct netlink_sock *nlk = nlk_sk(sk);
struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
int err = 0;
- long unsigned int groups = nladdr->nl_groups;
+ unsigned long groups = nladdr->nl_groups;
bool bound;
if (addr_len < sizeof(struct sockaddr_nl))
@@ -1011,9 +1006,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
return err;
}
- if (nlk->ngroups == 0)
- groups = 0;
- else if (nlk->ngroups < 8*sizeof(groups))
+ if (nlk->ngroups < BITS_PER_LONG)
groups &= (1UL << nlk->ngroups) - 1;
bound = nlk->bound;
@@ -1713,6 +1706,13 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
nlk->flags &= ~NETLINK_F_EXT_ACK;
err = 0;
break;
+ case NETLINK_DUMP_STRICT_CHK:
+ if (val)
+ nlk->flags |= NETLINK_F_STRICT_CHK;
+ else
+ nlk->flags &= ~NETLINK_F_STRICT_CHK;
+ err = 0;
+ break;
default:
err = -ENOPROTOOPT;
}
@@ -1806,6 +1806,15 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
return -EFAULT;
err = 0;
break;
+ case NETLINK_DUMP_STRICT_CHK:
+ if (len < sizeof(int))
+ return -EINVAL;
+ len = sizeof(int);
+ val = nlk->flags & NETLINK_F_STRICT_CHK ? 1 : 0;
+ if (put_user(len, optlen) || put_user(val, optval))
+ return -EFAULT;
+ err = 0;
+ break;
default:
err = -ENOPROTOOPT;
}
@@ -2178,6 +2187,7 @@ EXPORT_SYMBOL(__nlmsg_put);
static int netlink_dump(struct sock *sk)
{
struct netlink_sock *nlk = nlk_sk(sk);
+ struct netlink_ext_ack extack = {};
struct netlink_callback *cb;
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
@@ -2229,8 +2239,11 @@ static int netlink_dump(struct sock *sk)
skb_reserve(skb, skb_tailroom(skb) - alloc_size);
netlink_skb_set_owner_r(skb, sk);
- if (nlk->dump_done_errno > 0)
+ if (nlk->dump_done_errno > 0) {
+ cb->extack = &extack;
nlk->dump_done_errno = cb->dump(skb, cb);
+ cb->extack = NULL;
+ }
if (nlk->dump_done_errno > 0 ||
skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) {
@@ -2253,6 +2266,12 @@ static int netlink_dump(struct sock *sk)
memcpy(nlmsg_data(nlh), &nlk->dump_done_errno,
sizeof(nlk->dump_done_errno));
+ if (extack._msg && nlk->flags & NETLINK_F_EXT_ACK) {
+ nlh->nlmsg_flags |= NLM_F_ACK_TLVS;
+ if (!nla_put_string(skb, NLMSGERR_ATTR_MSG, extack._msg))
+ nlmsg_end(skb, nlh);
+ }
+
if (sk_filter(sk, skb))
kfree_skb(skb);
else
@@ -2279,9 +2298,9 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
struct netlink_dump_control *control)
{
+ struct netlink_sock *nlk, *nlk2;
struct netlink_callback *cb;
struct sock *sk;
- struct netlink_sock *nlk;
int ret;
refcount_inc(&skb->users);
@@ -2315,6 +2334,9 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
cb->min_dump_alloc = control->min_dump_alloc;
cb->skb = skb;
+ nlk2 = nlk_sk(NETLINK_CB(skb).sk);
+ cb->strict_check = !!(nlk2->flags & NETLINK_F_STRICT_CHK);
+
if (control->start) {
ret = control->start(cb);
if (ret)
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 962de7b3c023..5f454c8de6a4 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -15,6 +15,7 @@
#define NETLINK_F_LISTEN_ALL_NSID 0x10
#define NETLINK_F_CAP_ACK 0x20
#define NETLINK_F_EXT_ACK 0x40
+#define NETLINK_F_STRICT_CHK 0x80
#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index a66f102c6c01..4503937915ad 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -192,10 +192,8 @@ static void nci_uart_tty_close(struct tty_struct *tty)
if (!nu)
return;
- if (nu->tx_skb)
- kfree_skb(nu->tx_skb);
- if (nu->rx_skb)
- kfree_skb(nu->rx_skb);
+ kfree_skb(nu->tx_skb);
+ kfree_skb(nu->rx_skb);
skb_queue_purge(&nu->tx_q);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 35ae64cbef33..6bec37ab4472 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -933,6 +933,11 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
struct nf_conn *ct;
if (!cached) {
+ struct nf_hook_state state = {
+ .hook = NF_INET_PRE_ROUTING,
+ .pf = info->family,
+ .net = net,
+ };
struct nf_conn *tmpl = info->ct;
int err;
@@ -944,8 +949,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
nf_ct_set(skb, tmpl, IP_CT_NEW);
}
- err = nf_conntrack_in(net, info->family,
- NF_INET_PRE_ROUTING, skb);
+ err = nf_conntrack_in(skb, &state);
if (err != NF_ACCEPT)
return -ENOENT;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 0f5ce77460d4..6679e96ab1dc 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1182,14 +1182,14 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
ovs_header->dp_ifindex,
reply, info->snd_portid,
info->snd_seq, 0,
- OVS_FLOW_CMD_NEW,
+ OVS_FLOW_CMD_SET,
ufid_flags);
BUG_ON(error < 0);
}
} else {
/* Could not alloc without acts before locking. */
reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
- info, OVS_FLOW_CMD_NEW, false,
+ info, OVS_FLOW_CMD_SET, false,
ufid_flags);
if (IS_ERR(reply)) {
@@ -1265,7 +1265,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
}
reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
- OVS_FLOW_CMD_NEW, true, ufid_flags);
+ OVS_FLOW_CMD_GET, true, ufid_flags);
if (IS_ERR(reply)) {
err = PTR_ERR(reply);
goto unlock;
@@ -1389,7 +1389,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- OVS_FLOW_CMD_NEW, ufid_flags) < 0)
+ OVS_FLOW_CMD_GET, ufid_flags) < 0)
break;
cb->args[0] = bucket;
@@ -1730,7 +1730,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
ovs_dp_change(dp, info->attrs);
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
- info->snd_seq, 0, OVS_DP_CMD_NEW);
+ info->snd_seq, 0, OVS_DP_CMD_SET);
BUG_ON(err < 0);
ovs_unlock();
@@ -1761,7 +1761,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
goto err_unlock_free;
}
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
- info->snd_seq, 0, OVS_DP_CMD_NEW);
+ info->snd_seq, 0, OVS_DP_CMD_GET);
BUG_ON(err < 0);
ovs_unlock();
@@ -1785,7 +1785,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (i >= skip &&
ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- OVS_DP_CMD_NEW) < 0)
+ OVS_DP_CMD_GET) < 0)
break;
i++;
}
@@ -2101,7 +2101,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_NEW);
+ OVS_VPORT_CMD_SET);
BUG_ON(err < 0);
ovs_unlock();
@@ -2182,7 +2182,7 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
goto exit_unlock_free;
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_NEW);
+ OVS_VPORT_CMD_GET);
BUG_ON(err < 0);
rcu_read_unlock();
@@ -2218,7 +2218,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI,
- OVS_VPORT_CMD_NEW) < 0)
+ OVS_VPORT_CMD_GET) < 0)
goto out;
j++;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 56b8e7167790..35966da84769 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -254,21 +254,18 @@ static bool icmphdr_ok(struct sk_buff *skb)
static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
{
+ unsigned short frag_off;
+ unsigned int payload_ofs = 0;
unsigned int nh_ofs = skb_network_offset(skb);
unsigned int nh_len;
- int payload_ofs;
struct ipv6hdr *nh;
- uint8_t nexthdr;
- __be16 frag_off;
- int err;
+ int err, nexthdr, flags = 0;
err = check_header(skb, nh_ofs + sizeof(*nh));
if (unlikely(err))
return err;
nh = ipv6_hdr(skb);
- nexthdr = nh->nexthdr;
- payload_ofs = (u8 *)(nh + 1) - skb->data;
key->ip.proto = NEXTHDR_NONE;
key->ip.tos = ipv6_get_dsfield(nh);
@@ -277,10 +274,9 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
key->ipv6.addr.src = nh->saddr;
key->ipv6.addr.dst = nh->daddr;
- payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr, &frag_off);
-
- if (frag_off) {
- if (frag_off & htons(~0x7))
+ nexthdr = ipv6_find_hdr(skb, &payload_ofs, -1, &frag_off, &flags);
+ if (flags & IP6_FH_F_FRAG) {
+ if (frag_off)
key->ip.frag = OVS_FRAG_TYPE_LATER;
else
key->ip.frag = OVS_FRAG_TYPE_FIRST;
@@ -288,11 +284,11 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
key->ip.frag = OVS_FRAG_TYPE_NONE;
}
- /* Delayed handling of error in ipv6_skip_exthdr() as it
- * always sets frag_off to a valid value which may be
+ /* Delayed handling of error in ipv6_find_hdr() as it
+ * always sets flags and frag_off to a valid value which may be
* used to set key->ip.frag above.
*/
- if (unlikely(payload_ofs < 0))
+ if (unlikely(nexthdr < 0))
return -EPROTO;
nh_len = payload_ofs - nh_ofs;
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index bb95c43aae76..26f71cbf7527 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -43,7 +43,8 @@ static struct internal_dev *internal_dev_priv(struct net_device *netdev)
}
/* Called with rcu_read_lock_bh. */
-static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t
+internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
{
int len, err;
@@ -62,7 +63,7 @@ static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
} else {
netdev->stats.tx_errors++;
}
- return 0;
+ return NETDEV_TX_OK;
}
static int internal_dev_open(struct net_device *netdev)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d6e94dc7e290..ec3095f13aae 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3808,6 +3808,20 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
return fanout_set_data(po, optval, optlen);
}
+ case PACKET_IGNORE_OUTGOING:
+ {
+ int val;
+
+ if (optlen != sizeof(val))
+ return -EINVAL;
+ if (copy_from_user(&val, optval, sizeof(val)))
+ return -EFAULT;
+ if (val < 0 || val > 1)
+ return -EINVAL;
+
+ po->prot_hook.ignore_outgoing = !!val;
+ return 0;
+ }
case PACKET_TX_HAS_OFF:
{
unsigned int val;
@@ -3931,6 +3945,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
((u32)po->fanout->flags << 24)) :
0);
break;
+ case PACKET_IGNORE_OUTGOING:
+ val = po->prot_hook.ignore_outgoing;
+ break;
case PACKET_ROLLOVER_STATS:
if (!po->rollover)
return -EINVAL;
diff --git a/net/rds/rds.h b/net/rds/rds.h
index c4dcf654d8fe..6bfaf05b63b2 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -278,7 +278,7 @@ struct rds_incoming {
struct in6_addr i_saddr;
rds_rdma_cookie_t i_rdma_cookie;
- struct timeval i_rx_tstamp;
+ ktime_t i_rx_tstamp;
u64 i_rx_lat_trace[RDS_RX_MAX_TRACES];
};
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 504cd6bcc54c..727639dac8a7 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -43,18 +43,14 @@
void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
struct in6_addr *saddr)
{
- int i;
-
refcount_set(&inc->i_refcount, 1);
INIT_LIST_HEAD(&inc->i_item);
inc->i_conn = conn;
inc->i_saddr = *saddr;
inc->i_rdma_cookie = 0;
- inc->i_rx_tstamp.tv_sec = 0;
- inc->i_rx_tstamp.tv_usec = 0;
+ inc->i_rx_tstamp = ktime_set(0, 0);
- for (i = 0; i < RDS_RX_MAX_TRACES; i++)
- inc->i_rx_lat_trace[i] = 0;
+ memset(inc->i_rx_lat_trace, 0, sizeof(inc->i_rx_lat_trace));
}
EXPORT_SYMBOL_GPL(rds_inc_init);
@@ -67,8 +63,7 @@ void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp,
inc->i_conn_path = cp;
inc->i_saddr = *saddr;
inc->i_rdma_cookie = 0;
- inc->i_rx_tstamp.tv_sec = 0;
- inc->i_rx_tstamp.tv_usec = 0;
+ inc->i_rx_tstamp = ktime_set(0, 0);
}
EXPORT_SYMBOL_GPL(rds_inc_path_init);
@@ -385,7 +380,7 @@ void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr,
be32_to_cpu(inc->i_hdr.h_len),
inc->i_hdr.h_dport);
if (sock_flag(sk, SOCK_RCVTSTAMP))
- do_gettimeofday(&inc->i_rx_tstamp);
+ inc->i_rx_tstamp = ktime_get_real();
rds_inc_addref(inc);
inc->i_rx_lat_trace[RDS_MSG_RX_END] = local_clock();
list_add_tail(&inc->i_item, &rs->rs_recv_queue);
@@ -552,11 +547,11 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
goto out;
}
- if ((inc->i_rx_tstamp.tv_sec != 0) &&
+ if ((inc->i_rx_tstamp != 0) &&
sock_flag(rds_rs_to_sk(rs), SOCK_RCVTSTAMP)) {
+ struct timeval tv = ktime_to_timeval(inc->i_rx_tstamp);
ret = put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
- sizeof(struct timeval),
- &inc->i_rx_tstamp);
+ sizeof(tv), &tv);
if (ret)
goto out;
}
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 1355f5ca8d22..abca57040f37 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -510,8 +510,8 @@ void rfkill_remove_epo_lock(void)
/**
* rfkill_is_epo_lock_active - returns true EPO is active
*
- * Returns 0 (false) if there is NOT an active EPO contidion,
- * and 1 (true) if there is an active EPO contition, which
+ * Returns 0 (false) if there is NOT an active EPO condition,
+ * and 1 (true) if there is an active EPO condition, which
* locks all radios in one of the BLOCKED states.
*
* Can be called in atomic context.
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index ac44d8afffb1..013dbcb052e5 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -97,7 +97,8 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx,
srx->transport_len > len)
return -EINVAL;
- if (srx->transport.family != rx->family)
+ if (srx->transport.family != rx->family &&
+ srx->transport.family == AF_INET && rx->family != AF_INET6)
return -EAFNOSUPPORT;
switch (srx->transport.family) {
@@ -385,6 +386,20 @@ u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call)
EXPORT_SYMBOL(rxrpc_kernel_check_life);
/**
+ * rxrpc_kernel_get_epoch - Retrieve the epoch value from a call.
+ * @sock: The socket the call is on
+ * @call: The call to query
+ *
+ * Allow a kernel service to retrieve the epoch value from a service call to
+ * see if the client at the other end rebooted.
+ */
+u32 rxrpc_kernel_get_epoch(struct socket *sock, struct rxrpc_call *call)
+{
+ return call->conn->proto.epoch;
+}
+EXPORT_SYMBOL(rxrpc_kernel_get_epoch);
+
+/**
* rxrpc_kernel_check_call - Check a call's state
* @sock: The socket the call is on
* @call: The call to check
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index a6e6cae82c30..8cee7644965c 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -1098,7 +1098,6 @@ void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace);
-void rxrpc_lose_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_purge_queue(struct sk_buff_head *);
/*
@@ -1115,8 +1114,7 @@ static inline void rxrpc_sysctl_exit(void) {}
/*
* utils.c
*/
-int rxrpc_extract_addr_from_skb(struct rxrpc_local *, struct sockaddr_rxrpc *,
- struct sk_buff *);
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
static inline bool before(u32 seq1, u32 seq2)
{
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 652e314de38e..e0d8ca03169a 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -280,7 +280,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
peer = NULL;
if (!peer) {
peer = b->peer_backlog[peer_tail];
- if (rxrpc_extract_addr_from_skb(local, &peer->srx, skb) < 0)
+ if (rxrpc_extract_addr_from_skb(&peer->srx, skb) < 0)
return NULL;
b->peer_backlog[peer_tail] = NULL;
smp_store_release(&b->peer_backlog_tail,
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 885dae829f4a..c332722820c2 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -86,11 +86,12 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
_enter(",%x", sp->hdr.cid & RXRPC_CIDMASK);
- if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0)
+ if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
goto not_found;
- /* We may have to handle mixing IPv4 and IPv6 */
- if (srx.transport.family != local->srx.transport.family) {
+ if (srx.transport.family != local->srx.transport.family &&
+ (srx.transport.family == AF_INET &&
+ local->srx.transport.family != AF_INET6)) {
pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n",
srx.transport.family,
local->srx.transport.family);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 570b49d2da42..9128aa0e40aa 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -262,7 +262,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
while (list) {
skb = list;
list = skb->next;
- skb->next = NULL;
+ skb_mark_not_on_list(skb);
rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
}
diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
index 13bd8a4dfac7..927ead43df42 100644
--- a/net/rxrpc/local_event.c
+++ b/net/rxrpc/local_event.c
@@ -39,7 +39,7 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
_enter("");
- if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0)
+ if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
return;
msg.msg_name = &srx.transport;
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index e8fb8922bca8..0f0b499d1202 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -378,11 +378,13 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
if ((lose++ & 7) == 7) {
ret = 0;
lost = true;
- goto done;
}
}
- _proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq);
+ trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags,
+ retrans, lost);
+ if (lost)
+ goto done;
/* send the packet with the don't fragment bit set if we currently
* think it's small enough */
@@ -415,8 +417,6 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
goto send_fragmentable;
done:
- trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags,
- retrans, lost);
if (ret >= 0) {
if (whdr.flags & RXRPC_REQUEST_ACK) {
call->peer->rtt_last_req = skb->tstamp;
@@ -561,7 +561,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
continue;
}
- if (rxrpc_extract_addr_from_skb(local, &srx, skb) == 0) {
+ if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
msg.msg_namelen = srx.transport_len;
whdr.epoch = htonl(sp->hdr.epoch);
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 05b51bdbdd41..7feb611c7258 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -47,6 +47,8 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
*/
switch (srx->transport.family) {
case AF_INET:
+ srx->transport_len = sizeof(srx->transport.sin);
+ srx->transport.family = AF_INET;
srx->transport.sin.sin_port = serr->port;
switch (serr->ee.ee_origin) {
case SO_EE_ORIGIN_ICMP:
@@ -70,20 +72,20 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
#ifdef CONFIG_AF_RXRPC_IPV6
case AF_INET6:
- srx->transport.sin6.sin6_port = serr->port;
switch (serr->ee.ee_origin) {
case SO_EE_ORIGIN_ICMP6:
_net("Rx ICMP6");
+ srx->transport.sin6.sin6_port = serr->port;
memcpy(&srx->transport.sin6.sin6_addr,
skb_network_header(skb) + serr->addr_offset,
sizeof(struct in6_addr));
break;
case SO_EE_ORIGIN_ICMP:
_net("Rx ICMP on v6 sock");
- srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
- srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
- srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
- memcpy(srx->transport.sin6.sin6_addr.s6_addr + 12,
+ srx->transport_len = sizeof(srx->transport.sin);
+ srx->transport.family = AF_INET;
+ srx->transport.sin.sin_port = serr->port;
+ memcpy(&srx->transport.sin.sin_addr,
skb_network_header(skb) + serr->addr_offset,
sizeof(struct in_addr));
break;
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 816b19a78809..eaf19ebaa964 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -715,3 +715,46 @@ call_complete:
goto out;
}
EXPORT_SYMBOL(rxrpc_kernel_recv_data);
+
+/**
+ * rxrpc_kernel_get_reply_time - Get timestamp on first reply packet
+ * @sock: The socket that the call exists on
+ * @call: The call to query
+ * @_ts: Where to put the timestamp
+ *
+ * Retrieve the timestamp from the first DATA packet of the reply if it is
+ * in the ring. Returns true if successful, false if not.
+ */
+bool rxrpc_kernel_get_reply_time(struct socket *sock, struct rxrpc_call *call,
+ ktime_t *_ts)
+{
+ struct sk_buff *skb;
+ rxrpc_seq_t hard_ack, top, seq;
+ bool success = false;
+
+ mutex_lock(&call->user_mutex);
+
+ if (READ_ONCE(call->state) != RXRPC_CALL_CLIENT_RECV_REPLY)
+ goto out;
+
+ hard_ack = call->rx_hard_ack;
+ if (hard_ack != 0)
+ goto out;
+
+ seq = hard_ack + 1;
+ top = smp_load_acquire(&call->rx_top);
+ if (after(seq, top))
+ goto out;
+
+ skb = call->rxtx_buffer[seq & RXRPC_RXTX_BUFF_MASK];
+ if (!skb)
+ goto out;
+
+ *_ts = skb_get_ktime(skb);
+ success = true;
+
+out:
+ mutex_unlock(&call->user_mutex);
+ return success;
+}
+EXPORT_SYMBOL(rxrpc_kernel_get_reply_time);
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index b8985d01876a..913dca65cc65 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -69,21 +69,6 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
}
/*
- * Note the injected loss of a socket buffer.
- */
-void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
-{
- const void *here = __builtin_return_address(0);
- if (skb) {
- int n;
- CHECK_SLAB_OKAY(&skb->users);
- n = atomic_dec_return(select_skb_count(op));
- trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
- kfree_skb(skb);
- }
-}
-
-/*
* Clear a queue of socket buffers.
*/
void rxrpc_purge_queue(struct sk_buff_head *list)
diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c
index e801171fa351..ff7af71c4b49 100644
--- a/net/rxrpc/utils.c
+++ b/net/rxrpc/utils.c
@@ -17,28 +17,17 @@
/*
* Fill out a peer address from a socket buffer containing a packet.
*/
-int rxrpc_extract_addr_from_skb(struct rxrpc_local *local,
- struct sockaddr_rxrpc *srx,
- struct sk_buff *skb)
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb)
{
memset(srx, 0, sizeof(*srx));
switch (ntohs(skb->protocol)) {
case ETH_P_IP:
- if (local->srx.transport.family == AF_INET6) {
- srx->transport_type = SOCK_DGRAM;
- srx->transport_len = sizeof(srx->transport.sin6);
- srx->transport.sin6.sin6_family = AF_INET6;
- srx->transport.sin6.sin6_port = udp_hdr(skb)->source;
- srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
- srx->transport.sin6.sin6_addr.s6_addr32[3] = ip_hdr(skb)->saddr;
- } else {
- srx->transport_type = SOCK_DGRAM;
- srx->transport_len = sizeof(srx->transport.sin);
- srx->transport.sin.sin_family = AF_INET;
- srx->transport.sin.sin_port = udp_hdr(skb)->source;
- srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
- }
+ srx->transport_type = SOCK_DGRAM;
+ srx->transport_len = sizeof(srx->transport.sin);
+ srx->transport.sin.sin_family = AF_INET;
+ srx->transport.sin.sin_port = udp_hdr(skb)->source;
+ srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
return 0;
#ifdef CONFIG_AF_RXRPC_IPV6
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index e95741388311..1b9afdee5ba9 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -194,6 +194,17 @@ config NET_SCH_ETF
To compile this code as a module, choose M here: the
module will be called sch_etf.
+config NET_SCH_TAPRIO
+ tristate "Time Aware Priority (taprio) Scheduler"
+ help
+ Say Y here if you want to use the Time Aware Priority (taprio) packet
+ scheduling algorithm.
+
+ See the top of <file:net/sched/sch_taprio.c> for more details.
+
+ To compile this code as a module, choose M here: the
+ module will be called sch_taprio.
+
config NET_SCH_GRED
tristate "Generic Random Early Detection (GRED)"
---help---
diff --git a/net/sched/Makefile b/net/sched/Makefile
index f0403f49edcb..8a40431d7b5c 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -57,6 +57,7 @@ obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o
obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o
obj-$(CONFIG_NET_SCH_CBS) += sch_cbs.o
obj-$(CONFIG_NET_SCH_ETF) += sch_etf.o
+obj-$(CONFIG_NET_SCH_TAPRIO) += sch_taprio.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index e12f8ef7baa4..9c1b0729aebf 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -81,6 +81,7 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie,
static void free_tcf(struct tc_action *p)
{
free_percpu(p->cpu_bstats);
+ free_percpu(p->cpu_bstats_hw);
free_percpu(p->cpu_qstats);
tcf_set_action_cookie(&p->act_cookie, NULL);
@@ -103,11 +104,11 @@ static int __tcf_action_put(struct tc_action *p, bool bind)
{
struct tcf_idrinfo *idrinfo = p->idrinfo;
- if (refcount_dec_and_lock(&p->tcfa_refcnt, &idrinfo->lock)) {
+ if (refcount_dec_and_mutex_lock(&p->tcfa_refcnt, &idrinfo->lock)) {
if (bind)
atomic_dec(&p->tcfa_bindcnt);
idr_remove(&idrinfo->action_idr, p->tcfa_index);
- spin_unlock(&idrinfo->lock);
+ mutex_unlock(&idrinfo->lock);
tcf_action_cleanup(p);
return 1;
@@ -199,7 +200,7 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
struct tc_action *p;
unsigned long id = 1;
- spin_lock(&idrinfo->lock);
+ mutex_lock(&idrinfo->lock);
s_i = cb->args[0];
@@ -234,7 +235,7 @@ done:
if (index >= 0)
cb->args[0] = index + 1;
- spin_unlock(&idrinfo->lock);
+ mutex_unlock(&idrinfo->lock);
if (n_i) {
if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
cb->args[1] = n_i;
@@ -246,6 +247,20 @@ nla_put_failure:
goto done;
}
+static int tcf_idr_release_unsafe(struct tc_action *p)
+{
+ if (atomic_read(&p->tcfa_bindcnt) > 0)
+ return -EPERM;
+
+ if (refcount_dec_and_test(&p->tcfa_refcnt)) {
+ idr_remove(&p->idrinfo->action_idr, p->tcfa_index);
+ tcf_action_cleanup(p);
+ return ACT_P_DELETED;
+ }
+
+ return 0;
+}
+
static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
const struct tc_action_ops *ops)
{
@@ -262,15 +277,19 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
if (nla_put_string(skb, TCA_KIND, ops->kind))
goto nla_put_failure;
+ mutex_lock(&idrinfo->lock);
idr_for_each_entry_ul(idr, p, id) {
- ret = __tcf_idr_release(p, false, true);
+ ret = tcf_idr_release_unsafe(p);
if (ret == ACT_P_DELETED) {
module_put(ops->owner);
n_i++;
} else if (ret < 0) {
+ mutex_unlock(&idrinfo->lock);
goto nla_put_failure;
}
}
+ mutex_unlock(&idrinfo->lock);
+
if (nla_put_u32(skb, TCA_FCNT, n_i))
goto nla_put_failure;
nla_nest_end(skb, nest);
@@ -305,13 +324,13 @@ int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
struct tcf_idrinfo *idrinfo = tn->idrinfo;
struct tc_action *p;
- spin_lock(&idrinfo->lock);
+ mutex_lock(&idrinfo->lock);
p = idr_find(&idrinfo->action_idr, index);
if (IS_ERR(p))
p = NULL;
else if (p)
refcount_inc(&p->tcfa_refcnt);
- spin_unlock(&idrinfo->lock);
+ mutex_unlock(&idrinfo->lock);
if (p) {
*a = p;
@@ -326,10 +345,10 @@ static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)
struct tc_action *p;
int ret = 0;
- spin_lock(&idrinfo->lock);
+ mutex_lock(&idrinfo->lock);
p = idr_find(&idrinfo->action_idr, index);
if (!p) {
- spin_unlock(&idrinfo->lock);
+ mutex_unlock(&idrinfo->lock);
return -ENOENT;
}
@@ -339,7 +358,7 @@ static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)
WARN_ON(p != idr_remove(&idrinfo->action_idr,
p->tcfa_index));
- spin_unlock(&idrinfo->lock);
+ mutex_unlock(&idrinfo->lock);
tcf_action_cleanup(p);
module_put(owner);
@@ -350,7 +369,7 @@ static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)
ret = -EPERM;
}
- spin_unlock(&idrinfo->lock);
+ mutex_unlock(&idrinfo->lock);
return ret;
}
@@ -372,9 +391,12 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
if (!p->cpu_bstats)
goto err1;
+ p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+ if (!p->cpu_bstats_hw)
+ goto err2;
p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
if (!p->cpu_qstats)
- goto err2;
+ goto err3;
}
spin_lock_init(&p->tcfa_lock);
p->tcfa_index = index;
@@ -386,15 +408,17 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
&p->tcfa_rate_est,
&p->tcfa_lock, NULL, est);
if (err)
- goto err3;
+ goto err4;
}
p->idrinfo = idrinfo;
p->ops = ops;
*a = p;
return 0;
-err3:
+err4:
free_percpu(p->cpu_qstats);
+err3:
+ free_percpu(p->cpu_bstats_hw);
err2:
free_percpu(p->cpu_bstats);
err1:
@@ -407,10 +431,10 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
{
struct tcf_idrinfo *idrinfo = tn->idrinfo;
- spin_lock(&idrinfo->lock);
+ mutex_lock(&idrinfo->lock);
/* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
WARN_ON(!IS_ERR(idr_replace(&idrinfo->action_idr, a, a->tcfa_index)));
- spin_unlock(&idrinfo->lock);
+ mutex_unlock(&idrinfo->lock);
}
EXPORT_SYMBOL(tcf_idr_insert);
@@ -420,10 +444,10 @@ void tcf_idr_cleanup(struct tc_action_net *tn, u32 index)
{
struct tcf_idrinfo *idrinfo = tn->idrinfo;
- spin_lock(&idrinfo->lock);
+ mutex_lock(&idrinfo->lock);
/* Remove ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
WARN_ON(!IS_ERR(idr_remove(&idrinfo->action_idr, index)));
- spin_unlock(&idrinfo->lock);
+ mutex_unlock(&idrinfo->lock);
}
EXPORT_SYMBOL(tcf_idr_cleanup);
@@ -441,14 +465,14 @@ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
int ret;
again:
- spin_lock(&idrinfo->lock);
+ mutex_lock(&idrinfo->lock);
if (*index) {
p = idr_find(&idrinfo->action_idr, *index);
if (IS_ERR(p)) {
/* This means that another process allocated
* index but did not assign the pointer yet.
*/
- spin_unlock(&idrinfo->lock);
+ mutex_unlock(&idrinfo->lock);
goto again;
}
@@ -461,7 +485,7 @@ again:
} else {
*a = NULL;
ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
- *index, GFP_ATOMIC);
+ *index, GFP_KERNEL);
if (!ret)
idr_replace(&idrinfo->action_idr,
ERR_PTR(-EBUSY), *index);
@@ -470,12 +494,12 @@ again:
*index = 1;
*a = NULL;
ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
- UINT_MAX, GFP_ATOMIC);
+ UINT_MAX, GFP_KERNEL);
if (!ret)
idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY),
*index);
}
- spin_unlock(&idrinfo->lock);
+ mutex_unlock(&idrinfo->lock);
return ret;
}
EXPORT_SYMBOL(tcf_idr_check_alloc);
@@ -979,6 +1003,8 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
goto errout;
if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 ||
+ gnet_stats_copy_basic_hw(NULL, &d, p->cpu_bstats_hw,
+ &p->tcfa_bstats_hw) < 0 ||
gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 ||
gnet_stats_copy_queue(&d, p->cpu_qstats,
&p->tcfa_qstats,
@@ -1073,12 +1099,14 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
err = -EINVAL;
ops = tc_lookup_action(tb[TCA_ACT_KIND]);
if (!ops) { /* could happen in batch of actions */
- NL_SET_ERR_MSG(extack, "Specified TC action not found");
+ NL_SET_ERR_MSG(extack, "Specified TC action kind not found");
goto err_out;
}
err = -ENOENT;
- if (ops->lookup(net, &a, index, extack) == 0)
+ if (ops->lookup(net, &a, index) == 0) {
+ NL_SET_ERR_MSG(extack, "TC action with specified index not found");
goto err_mod;
+ }
module_put(ops->owner);
return a;
@@ -1424,7 +1452,7 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
u32 act_count = 0;
ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX,
- tcaa_policy, NULL);
+ tcaa_policy, cb->extack);
if (ret < 0)
return ret;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 0c68bc9cf0b4..c7633843e223 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -387,8 +387,7 @@ static int tcf_bpf_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
- struct netlink_ext_ack *extack)
+static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 6f0f273f1139..8475913f2070 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -143,8 +143,10 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
return -EEXIST;
}
/* replacing action and zone */
+ spin_lock_bh(&ci->tcf_lock);
ci->tcf_action = parm->action;
ci->zone = parm->zone;
+ spin_unlock_bh(&ci->tcf_lock);
ret = 0;
}
@@ -156,16 +158,16 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_connmark_info *ci = to_connmark(a);
-
struct tc_connmark opt = {
.index = ci->tcf_index,
.refcnt = refcount_read(&ci->tcf_refcnt) - ref,
.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
- .action = ci->tcf_action,
- .zone = ci->zone,
};
struct tcf_t t;
+ spin_lock_bh(&ci->tcf_lock);
+ opt.action = ci->tcf_action;
+ opt.zone = ci->zone;
if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -173,9 +175,12 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
if (nla_put_64bit(skb, TCA_CONNMARK_TM, sizeof(t), &t,
TCA_CONNMARK_PAD))
goto nla_put_failure;
+ spin_unlock_bh(&ci->tcf_lock);
return skb->len;
+
nla_put_failure:
+ spin_unlock_bh(&ci->tcf_lock);
nlmsg_trim(skb, b);
return -1;
}
@@ -190,8 +195,7 @@ static int tcf_connmark_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index,
- struct netlink_ext_ack *extack)
+static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index b8a67ae3105a..3dc25b7806d7 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -646,8 +646,7 @@ static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index,
- struct netlink_ext_ack *extack)
+static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index cd1d9bd32ef9..c89a7fa43d1b 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -157,7 +157,7 @@ static int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a,
}
static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
- u64 lastuse)
+ u64 lastuse, bool hw)
{
struct tcf_gact *gact = to_gact(a);
int action = READ_ONCE(gact->tcf_action);
@@ -168,6 +168,10 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
if (action == TC_ACT_SHOT)
this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
+ if (hw)
+ _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats_hw),
+ bytes, packets);
+
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
@@ -222,8 +226,7 @@ static int tcf_gact_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index,
- struct netlink_ext_ack *extack)
+static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 06a3d4801878..30b63fa23ee2 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -855,8 +855,7 @@ static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index,
- struct netlink_ext_ack *extack)
+static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 8525de811616..8af6c11d2482 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -329,8 +329,7 @@ static int tcf_ipt_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index,
- struct netlink_ext_ack *extack)
+static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, ipt_net_id);
@@ -379,8 +378,7 @@ static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index,
- struct netlink_ext_ack *extack)
+static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, xt_net_id);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 8bf66d0a6800..1dae5f2b358f 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -283,12 +283,15 @@ out:
}
static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
- u64 lastuse)
+ u64 lastuse, bool hw)
{
struct tcf_mirred *m = to_mirred(a);
struct tcf_t *tm = &m->tcf_tm;
_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+ if (hw)
+ _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
+ bytes, packets);
tm->lastuse = max_t(u64, tm->lastuse, lastuse);
}
@@ -338,8 +341,7 @@ static int tcf_mirred_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index,
- struct netlink_ext_ack *extack)
+static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 4313aa102440..c5c1e23add77 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -256,28 +256,31 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
unsigned char *b = skb_tail_pointer(skb);
struct tcf_nat *p = to_tcf_nat(a);
struct tc_nat opt = {
- .old_addr = p->old_addr,
- .new_addr = p->new_addr,
- .mask = p->mask,
- .flags = p->flags,
-
.index = p->tcf_index,
- .action = p->tcf_action,
.refcnt = refcount_read(&p->tcf_refcnt) - ref,
.bindcnt = atomic_read(&p->tcf_bindcnt) - bind,
};
struct tcf_t t;
+ spin_lock_bh(&p->tcf_lock);
+ opt.old_addr = p->old_addr;
+ opt.new_addr = p->new_addr;
+ opt.mask = p->mask;
+ opt.flags = p->flags;
+ opt.action = p->tcf_action;
+
if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
tcf_tm_dump(&t, &p->tcf_tm);
if (nla_put_64bit(skb, TCA_NAT_TM, sizeof(t), &t, TCA_NAT_PAD))
goto nla_put_failure;
+ spin_unlock_bh(&p->tcf_lock);
return skb->len;
nla_put_failure:
+ spin_unlock_bh(&p->tcf_lock);
nlmsg_trim(skb, b);
return -1;
}
@@ -292,8 +295,7 @@ static int tcf_nat_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index,
- struct netlink_ext_ack *extack)
+static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index ad99a99f11f6..da3dd0f68cc2 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -460,8 +460,7 @@ static int tcf_pedit_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index,
- struct netlink_ext_ack *extack)
+static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 5d8bfa878477..92649d2667ed 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -22,8 +22,7 @@
#include <net/act_api.h>
#include <net/netlink.h>
-struct tcf_police {
- struct tc_action common;
+struct tcf_police_params {
int tcfp_result;
u32 tcfp_ewma_rate;
s64 tcfp_burst;
@@ -36,6 +35,12 @@ struct tcf_police {
bool rate_present;
struct psched_ratecfg peak;
bool peak_present;
+ struct rcu_head rcu;
+};
+
+struct tcf_police {
+ struct tc_action common;
+ struct tcf_police_params __rcu *params;
};
#define to_police(pc) ((struct tcf_police *)pc)
@@ -84,6 +89,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
struct tcf_police *police;
struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
struct tc_action_net *tn = net_generic(net, police_net_id);
+ struct tcf_police_params *new;
bool exists = false;
int size;
@@ -110,7 +116,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
if (!exists) {
ret = tcf_idr_create(tn, parm->index, NULL, a,
- &act_police_ops, bind, false);
+ &act_police_ops, bind, true);
if (ret) {
tcf_idr_cleanup(tn, parm->index);
return ret;
@@ -137,7 +143,8 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
}
if (est) {
- err = gen_replace_estimator(&police->tcf_bstats, NULL,
+ err = gen_replace_estimator(&police->tcf_bstats,
+ police->common.cpu_bstats,
&police->tcf_rate_est,
&police->tcf_lock,
NULL, est);
@@ -150,50 +157,60 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
goto failure;
}
- spin_lock_bh(&police->tcf_lock);
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
+ if (unlikely(!new)) {
+ err = -ENOMEM;
+ goto failure;
+ }
+
/* No failure allowed after this point */
- police->tcfp_mtu = parm->mtu;
- if (police->tcfp_mtu == 0) {
- police->tcfp_mtu = ~0;
+ new->tcfp_mtu = parm->mtu;
+ if (!new->tcfp_mtu) {
+ new->tcfp_mtu = ~0;
if (R_tab)
- police->tcfp_mtu = 255 << R_tab->rate.cell_log;
+ new->tcfp_mtu = 255 << R_tab->rate.cell_log;
}
if (R_tab) {
- police->rate_present = true;
- psched_ratecfg_precompute(&police->rate, &R_tab->rate, 0);
+ new->rate_present = true;
+ psched_ratecfg_precompute(&new->rate, &R_tab->rate, 0);
qdisc_put_rtab(R_tab);
} else {
- police->rate_present = false;
+ new->rate_present = false;
}
if (P_tab) {
- police->peak_present = true;
- psched_ratecfg_precompute(&police->peak, &P_tab->rate, 0);
+ new->peak_present = true;
+ psched_ratecfg_precompute(&new->peak, &P_tab->rate, 0);
qdisc_put_rtab(P_tab);
} else {
- police->peak_present = false;
+ new->peak_present = false;
}
if (tb[TCA_POLICE_RESULT])
- police->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
- police->tcfp_burst = PSCHED_TICKS2NS(parm->burst);
- police->tcfp_toks = police->tcfp_burst;
- if (police->peak_present) {
- police->tcfp_mtu_ptoks = (s64) psched_l2t_ns(&police->peak,
- police->tcfp_mtu);
- police->tcfp_ptoks = police->tcfp_mtu_ptoks;
+ new->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
+ new->tcfp_burst = PSCHED_TICKS2NS(parm->burst);
+ new->tcfp_toks = new->tcfp_burst;
+ if (new->peak_present) {
+ new->tcfp_mtu_ptoks = (s64)psched_l2t_ns(&new->peak,
+ new->tcfp_mtu);
+ new->tcfp_ptoks = new->tcfp_mtu_ptoks;
}
- police->tcf_action = parm->action;
if (tb[TCA_POLICE_AVRATE])
- police->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
+ new->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
+ spin_lock_bh(&police->tcf_lock);
+ new->tcfp_t_c = ktime_get_ns();
+ police->tcf_action = parm->action;
+ rcu_swap_protected(police->params,
+ new,
+ lockdep_is_held(&police->tcf_lock));
spin_unlock_bh(&police->tcf_lock);
- if (ret != ACT_P_CREATED)
- return ret;
- police->tcfp_t_c = ktime_get_ns();
- tcf_idr_insert(tn, *a);
+ if (new)
+ kfree_rcu(new, rcu);
+ if (ret == ACT_P_CREATED)
+ tcf_idr_insert(tn, *a);
return ret;
failure:
@@ -207,64 +224,69 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_police *police = to_police(a);
- s64 now;
- s64 toks;
- s64 ptoks = 0;
-
- spin_lock(&police->tcf_lock);
+ struct tcf_police_params *p;
+ s64 now, toks, ptoks = 0;
+ int ret;
- bstats_update(&police->tcf_bstats, skb);
tcf_lastuse_update(&police->tcf_tm);
+ bstats_cpu_update(this_cpu_ptr(police->common.cpu_bstats), skb);
- if (police->tcfp_ewma_rate) {
+ ret = READ_ONCE(police->tcf_action);
+ p = rcu_dereference_bh(police->params);
+
+ if (p->tcfp_ewma_rate) {
struct gnet_stats_rate_est64 sample;
if (!gen_estimator_read(&police->tcf_rate_est, &sample) ||
- sample.bps >= police->tcfp_ewma_rate) {
- police->tcf_qstats.overlimits++;
- if (police->tcf_action == TC_ACT_SHOT)
- police->tcf_qstats.drops++;
- spin_unlock(&police->tcf_lock);
- return police->tcf_action;
- }
+ sample.bps >= p->tcfp_ewma_rate)
+ goto inc_overlimits;
}
- if (qdisc_pkt_len(skb) <= police->tcfp_mtu) {
- if (!police->rate_present) {
- spin_unlock(&police->tcf_lock);
- return police->tcfp_result;
+ if (qdisc_pkt_len(skb) <= p->tcfp_mtu) {
+ if (!p->rate_present) {
+ ret = p->tcfp_result;
+ goto end;
}
now = ktime_get_ns();
- toks = min_t(s64, now - police->tcfp_t_c,
- police->tcfp_burst);
- if (police->peak_present) {
- ptoks = toks + police->tcfp_ptoks;
- if (ptoks > police->tcfp_mtu_ptoks)
- ptoks = police->tcfp_mtu_ptoks;
- ptoks -= (s64) psched_l2t_ns(&police->peak,
- qdisc_pkt_len(skb));
+ toks = min_t(s64, now - p->tcfp_t_c, p->tcfp_burst);
+ if (p->peak_present) {
+ ptoks = toks + p->tcfp_ptoks;
+ if (ptoks > p->tcfp_mtu_ptoks)
+ ptoks = p->tcfp_mtu_ptoks;
+ ptoks -= (s64)psched_l2t_ns(&p->peak,
+ qdisc_pkt_len(skb));
}
- toks += police->tcfp_toks;
- if (toks > police->tcfp_burst)
- toks = police->tcfp_burst;
- toks -= (s64) psched_l2t_ns(&police->rate, qdisc_pkt_len(skb));
+ toks += p->tcfp_toks;
+ if (toks > p->tcfp_burst)
+ toks = p->tcfp_burst;
+ toks -= (s64)psched_l2t_ns(&p->rate, qdisc_pkt_len(skb));
if ((toks|ptoks) >= 0) {
- police->tcfp_t_c = now;
- police->tcfp_toks = toks;
- police->tcfp_ptoks = ptoks;
- if (police->tcfp_result == TC_ACT_SHOT)
- police->tcf_qstats.drops++;
- spin_unlock(&police->tcf_lock);
- return police->tcfp_result;
+ p->tcfp_t_c = now;
+ p->tcfp_toks = toks;
+ p->tcfp_ptoks = ptoks;
+ ret = p->tcfp_result;
+ goto inc_drops;
}
}
- police->tcf_qstats.overlimits++;
- if (police->tcf_action == TC_ACT_SHOT)
- police->tcf_qstats.drops++;
- spin_unlock(&police->tcf_lock);
- return police->tcf_action;
+inc_overlimits:
+ qstats_overlimit_inc(this_cpu_ptr(police->common.cpu_qstats));
+inc_drops:
+ if (ret == TC_ACT_SHOT)
+ qstats_drop_inc(this_cpu_ptr(police->common.cpu_qstats));
+end:
+ return ret;
+}
+
+static void tcf_police_cleanup(struct tc_action *a)
+{
+ struct tcf_police *police = to_police(a);
+ struct tcf_police_params *p;
+
+ p = rcu_dereference_protected(police->params, 1);
+ if (p)
+ kfree_rcu(p, rcu);
}
static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
@@ -272,6 +294,7 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_police *police = to_police(a);
+ struct tcf_police_params *p;
struct tc_police opt = {
.index = police->tcf_index,
.refcnt = refcount_read(&police->tcf_refcnt) - ref,
@@ -281,19 +304,21 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
spin_lock_bh(&police->tcf_lock);
opt.action = police->tcf_action;
- opt.mtu = police->tcfp_mtu;
- opt.burst = PSCHED_NS2TICKS(police->tcfp_burst);
- if (police->rate_present)
- psched_ratecfg_getrate(&opt.rate, &police->rate);
- if (police->peak_present)
- psched_ratecfg_getrate(&opt.peakrate, &police->peak);
+ p = rcu_dereference_protected(police->params,
+ lockdep_is_held(&police->tcf_lock));
+ opt.mtu = p->tcfp_mtu;
+ opt.burst = PSCHED_NS2TICKS(p->tcfp_burst);
+ if (p->rate_present)
+ psched_ratecfg_getrate(&opt.rate, &p->rate);
+ if (p->peak_present)
+ psched_ratecfg_getrate(&opt.peakrate, &p->peak);
if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt))
goto nla_put_failure;
- if (police->tcfp_result &&
- nla_put_u32(skb, TCA_POLICE_RESULT, police->tcfp_result))
+ if (p->tcfp_result &&
+ nla_put_u32(skb, TCA_POLICE_RESULT, p->tcfp_result))
goto nla_put_failure;
- if (police->tcfp_ewma_rate &&
- nla_put_u32(skb, TCA_POLICE_AVRATE, police->tcfp_ewma_rate))
+ if (p->tcfp_ewma_rate &&
+ nla_put_u32(skb, TCA_POLICE_AVRATE, p->tcfp_ewma_rate))
goto nla_put_failure;
t.install = jiffies_to_clock_t(jiffies - police->tcf_tm.install);
@@ -312,8 +337,7 @@ nla_put_failure:
return -1;
}
-static int tcf_police_search(struct net *net, struct tc_action **a, u32 index,
- struct netlink_ext_ack *extack)
+static int tcf_police_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, police_net_id);
@@ -333,6 +357,7 @@ static struct tc_action_ops act_police_ops = {
.init = tcf_police_init,
.walk = tcf_police_walker,
.lookup = tcf_police_search,
+ .cleanup = tcf_police_cleanup,
.size = sizeof(struct tcf_police),
};
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 6b67aa13d2dd..1a0c682fd734 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -224,8 +224,7 @@ static int tcf_sample_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index,
- struct netlink_ext_ack *extack)
+static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 52400d49f81f..902957beceb3 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -188,8 +188,7 @@ static int tcf_simp_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index,
- struct netlink_ext_ack *extack)
+static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 73e44ce2a883..64dba3708fce 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -99,7 +99,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
- struct tcf_skbedit_params *params_old, *params_new;
+ struct tcf_skbedit_params *params_new;
struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
struct tc_skbedit *parm;
struct tcf_skbedit *d;
@@ -187,8 +187,6 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
}
}
- ASSERT_RTNL();
-
params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
if (unlikely(!params_new)) {
if (ret == ACT_P_CREATED)
@@ -210,11 +208,13 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
if (flags & SKBEDIT_F_MASK)
params_new->mask = *mask;
+ spin_lock_bh(&d->tcf_lock);
d->tcf_action = parm->action;
- params_old = rtnl_dereference(d->params);
- rcu_assign_pointer(d->params, params_new);
- if (params_old)
- kfree_rcu(params_old, rcu);
+ rcu_swap_protected(d->params, params_new,
+ lockdep_is_held(&d->tcf_lock));
+ spin_unlock_bh(&d->tcf_lock);
+ if (params_new)
+ kfree_rcu(params_new, rcu);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
@@ -231,12 +231,14 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
.index = d->tcf_index,
.refcnt = refcount_read(&d->tcf_refcnt) - ref,
.bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
- .action = d->tcf_action,
};
u64 pure_flags = 0;
struct tcf_t t;
- params = rtnl_dereference(d->params);
+ spin_lock_bh(&d->tcf_lock);
+ params = rcu_dereference_protected(d->params,
+ lockdep_is_held(&d->tcf_lock));
+ opt.action = d->tcf_action;
if (nla_put(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -264,9 +266,12 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
tcf_tm_dump(&t, &d->tcf_tm);
if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD))
goto nla_put_failure;
+ spin_unlock_bh(&d->tcf_lock);
+
return skb->len;
nla_put_failure:
+ spin_unlock_bh(&d->tcf_lock);
nlmsg_trim(skb, b);
return -1;
}
@@ -291,8 +296,7 @@ static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index,
- struct netlink_ext_ack *extack)
+static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 588077fafd6c..59710a183bd3 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -251,8 +251,7 @@ static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index,
- struct netlink_ext_ack *extack)
+static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 681f6f04e7da..4cca8f274662 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -548,8 +548,7 @@ static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
- struct netlink_ext_ack *extack)
+static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 033d273afe50..ba677d54a7af 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -288,8 +288,7 @@ static int tcf_vlan_walker(struct net *net, struct sk_buff *skb,
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
-static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index,
- struct netlink_ext_ack *extack)
+static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 0a75cb2e5e7b..43c8559aca56 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -240,8 +240,8 @@ static void tcf_chain_destroy(struct tcf_chain *chain)
if (!chain->index)
block->chain0.chain = NULL;
kfree(chain);
- if (list_empty(&block->chain_list) && block->refcnt == 0)
- kfree(block);
+ if (list_empty(&block->chain_list) && !refcount_read(&block->refcnt))
+ kfree_rcu(block, rcu);
}
static void tcf_chain_hold(struct tcf_chain *chain)
@@ -473,6 +473,7 @@ tcf_chain0_head_change_cb_del(struct tcf_block *block,
}
struct tcf_net {
+ spinlock_t idr_lock; /* Protects idr */
struct idr idr;
};
@@ -482,16 +483,25 @@ static int tcf_block_insert(struct tcf_block *block, struct net *net,
struct netlink_ext_ack *extack)
{
struct tcf_net *tn = net_generic(net, tcf_net_id);
+ int err;
+
+ idr_preload(GFP_KERNEL);
+ spin_lock(&tn->idr_lock);
+ err = idr_alloc_u32(&tn->idr, block, &block->index, block->index,
+ GFP_NOWAIT);
+ spin_unlock(&tn->idr_lock);
+ idr_preload_end();
- return idr_alloc_u32(&tn->idr, block, &block->index, block->index,
- GFP_KERNEL);
+ return err;
}
static void tcf_block_remove(struct tcf_block *block, struct net *net)
{
struct tcf_net *tn = net_generic(net, tcf_net_id);
+ spin_lock(&tn->idr_lock);
idr_remove(&tn->idr, block->index);
+ spin_unlock(&tn->idr_lock);
}
static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
@@ -510,7 +520,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
INIT_LIST_HEAD(&block->owner_list);
INIT_LIST_HEAD(&block->chain0.filter_chain_list);
- block->refcnt = 1;
+ refcount_set(&block->refcnt, 1);
block->net = net;
block->index = block_index;
@@ -527,6 +537,78 @@ static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
return idr_find(&tn->idr, block_index);
}
+static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
+{
+ struct tcf_block *block;
+
+ rcu_read_lock();
+ block = tcf_block_lookup(net, block_index);
+ if (block && !refcount_inc_not_zero(&block->refcnt))
+ block = NULL;
+ rcu_read_unlock();
+
+ return block;
+}
+
+static void tcf_block_flush_all_chains(struct tcf_block *block)
+{
+ struct tcf_chain *chain;
+
+ /* Hold a refcnt for all chains, so that they don't disappear
+ * while we are iterating.
+ */
+ list_for_each_entry(chain, &block->chain_list, list)
+ tcf_chain_hold(chain);
+
+ list_for_each_entry(chain, &block->chain_list, list)
+ tcf_chain_flush(chain);
+}
+
+static void tcf_block_put_all_chains(struct tcf_block *block)
+{
+ struct tcf_chain *chain, *tmp;
+
+ /* At this point, all the chains should have refcnt >= 1. */
+ list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
+ tcf_chain_put_explicitly_created(chain);
+ tcf_chain_put(chain);
+ }
+}
+
+static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei)
+{
+ if (refcount_dec_and_test(&block->refcnt)) {
+ /* Flushing/putting all chains will cause the block to be
+ * deallocated when last chain is freed. However, if chain_list
+ * is empty, block has to be manually deallocated. After block
+ * reference counter reached 0, it is no longer possible to
+ * increment it or add new chains to block.
+ */
+ bool free_block = list_empty(&block->chain_list);
+
+ if (tcf_block_shared(block))
+ tcf_block_remove(block, block->net);
+ if (!free_block)
+ tcf_block_flush_all_chains(block);
+
+ if (q)
+ tcf_block_offload_unbind(block, q, ei);
+
+ if (free_block)
+ kfree_rcu(block, rcu);
+ else
+ tcf_block_put_all_chains(block);
+ } else if (q) {
+ tcf_block_offload_unbind(block, q, ei);
+ }
+}
+
+static void tcf_block_refcnt_put(struct tcf_block *block)
+{
+ __tcf_block_put(block, NULL, NULL);
+}
+
/* Find tcf block.
* Set q, parent, cl when appropriate.
*/
@@ -537,9 +619,10 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
struct netlink_ext_ack *extack)
{
struct tcf_block *block;
+ int err = 0;
if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
- block = tcf_block_lookup(net, block_index);
+ block = tcf_block_refcnt_get(net, block_index);
if (!block) {
NL_SET_ERR_MSG(extack, "Block of given index was not found");
return ERR_PTR(-EINVAL);
@@ -548,55 +631,106 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
const struct Qdisc_class_ops *cops;
struct net_device *dev;
+ rcu_read_lock();
+
/* Find link */
- dev = __dev_get_by_index(net, ifindex);
- if (!dev)
+ dev = dev_get_by_index_rcu(net, ifindex);
+ if (!dev) {
+ rcu_read_unlock();
return ERR_PTR(-ENODEV);
+ }
/* Find qdisc */
if (!*parent) {
*q = dev->qdisc;
*parent = (*q)->handle;
} else {
- *q = qdisc_lookup(dev, TC_H_MAJ(*parent));
+ *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
if (!*q) {
NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
- return ERR_PTR(-EINVAL);
+ err = -EINVAL;
+ goto errout_rcu;
}
}
+ *q = qdisc_refcount_inc_nz(*q);
+ if (!*q) {
+ NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
+ err = -EINVAL;
+ goto errout_rcu;
+ }
+
/* Is it classful? */
cops = (*q)->ops->cl_ops;
if (!cops) {
NL_SET_ERR_MSG(extack, "Qdisc not classful");
- return ERR_PTR(-EINVAL);
+ err = -EINVAL;
+ goto errout_rcu;
}
if (!cops->tcf_block) {
NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
- return ERR_PTR(-EOPNOTSUPP);
+ err = -EOPNOTSUPP;
+ goto errout_rcu;
}
+ /* At this point we know that qdisc is not noop_qdisc,
+ * which means that qdisc holds a reference to net_device
+ * and we hold a reference to qdisc, so it is safe to release
+ * rcu read lock.
+ */
+ rcu_read_unlock();
+
/* Do we search for filter, attached to class? */
if (TC_H_MIN(*parent)) {
*cl = cops->find(*q, *parent);
if (*cl == 0) {
NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
- return ERR_PTR(-ENOENT);
+ err = -ENOENT;
+ goto errout_qdisc;
}
}
/* And the last stroke */
block = cops->tcf_block(*q, *cl, extack);
- if (!block)
- return ERR_PTR(-EINVAL);
+ if (!block) {
+ err = -EINVAL;
+ goto errout_qdisc;
+ }
if (tcf_block_shared(block)) {
NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
- return ERR_PTR(-EOPNOTSUPP);
+ err = -EOPNOTSUPP;
+ goto errout_qdisc;
}
+
+ /* Always take reference to block in order to support execution
+ * of rules update path of cls API without rtnl lock. Caller
+ * must release block when it is finished using it. 'if' block
+ * of this conditional obtain reference to block by calling
+ * tcf_block_refcnt_get().
+ */
+ refcount_inc(&block->refcnt);
}
return block;
+
+errout_rcu:
+ rcu_read_unlock();
+errout_qdisc:
+ if (*q) {
+ qdisc_put(*q);
+ *q = NULL;
+ }
+ return ERR_PTR(err);
+}
+
+static void tcf_block_release(struct Qdisc *q, struct tcf_block *block)
+{
+ if (!IS_ERR_OR_NULL(block))
+ tcf_block_refcnt_put(block);
+
+ if (q)
+ qdisc_put(q);
}
struct tcf_block_owner_item {
@@ -664,21 +798,16 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
{
struct net *net = qdisc_net(q);
struct tcf_block *block = NULL;
- bool created = false;
int err;
- if (ei->block_index) {
+ if (ei->block_index)
/* block_index not 0 means the shared block is requested */
- block = tcf_block_lookup(net, ei->block_index);
- if (block)
- block->refcnt++;
- }
+ block = tcf_block_refcnt_get(net, ei->block_index);
if (!block) {
block = tcf_block_create(net, q, ei->block_index, extack);
if (IS_ERR(block))
return PTR_ERR(block);
- created = true;
if (tcf_block_shared(block)) {
err = tcf_block_insert(block, net, extack);
if (err)
@@ -708,14 +837,8 @@ err_block_offload_bind:
err_chain0_head_change_cb_add:
tcf_block_owner_del(block, q, ei->binder_type);
err_block_owner_add:
- if (created) {
- if (tcf_block_shared(block))
- tcf_block_remove(block, net);
err_block_insert:
- kfree(block);
- } else {
- block->refcnt--;
- }
+ tcf_block_refcnt_put(block);
return err;
}
EXPORT_SYMBOL(tcf_block_get_ext);
@@ -747,42 +870,12 @@ EXPORT_SYMBOL(tcf_block_get);
void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
struct tcf_block_ext_info *ei)
{
- struct tcf_chain *chain, *tmp;
-
if (!block)
return;
tcf_chain0_head_change_cb_del(block, ei);
tcf_block_owner_del(block, q, ei->binder_type);
- if (block->refcnt == 1) {
- if (tcf_block_shared(block))
- tcf_block_remove(block, block->net);
-
- /* Hold a refcnt for all chains, so that they don't disappear
- * while we are iterating.
- */
- list_for_each_entry(chain, &block->chain_list, list)
- tcf_chain_hold(chain);
-
- list_for_each_entry(chain, &block->chain_list, list)
- tcf_chain_flush(chain);
- }
-
- tcf_block_offload_unbind(block, q, ei);
-
- if (block->refcnt == 1) {
- /* At this point, all the chains should have refcnt >= 1. */
- list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
- tcf_chain_put_explicitly_created(chain);
- tcf_chain_put(chain);
- }
-
- block->refcnt--;
- if (list_empty(&block->chain_list))
- kfree(block);
- } else {
- block->refcnt--;
- }
+ __tcf_block_put(block, q, ei);
}
EXPORT_SYMBOL(tcf_block_put_ext);
@@ -1332,6 +1425,7 @@ replay:
errout:
if (chain)
tcf_chain_put(chain);
+ tcf_block_release(q, block);
if (err == -EAGAIN)
/* Replay the request. */
goto replay;
@@ -1453,6 +1547,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
errout:
if (chain)
tcf_chain_put(chain);
+ tcf_block_release(q, block);
return err;
}
@@ -1538,6 +1633,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
errout:
if (chain)
tcf_chain_put(chain);
+ tcf_block_release(q, block);
return err;
}
@@ -1631,12 +1727,13 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
if (nlmsg_len(cb->nlh) < sizeof(*tcm))
return skb->len;
- err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
+ err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL,
+ cb->extack);
if (err)
return err;
if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
- block = tcf_block_lookup(net, tcm->tcm_block_index);
+ block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
if (!block)
goto out;
/* If we work with block index, q is NULL and parent value
@@ -1695,6 +1792,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
}
}
+ if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
+ tcf_block_refcnt_put(block);
cb->args[0] = index;
out:
@@ -1854,7 +1953,8 @@ replay:
chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
if (chain_index > TC_ACT_EXT_VAL_MASK) {
NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
- return -EINVAL;
+ err = -EINVAL;
+ goto errout_block;
}
chain = tcf_chain_lookup(block, chain_index);
if (n->nlmsg_type == RTM_NEWCHAIN) {
@@ -1866,23 +1966,27 @@ replay:
tcf_chain_hold(chain);
} else {
NL_SET_ERR_MSG(extack, "Filter chain already exists");
- return -EEXIST;
+ err = -EEXIST;
+ goto errout_block;
}
} else {
if (!(n->nlmsg_flags & NLM_F_CREATE)) {
NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
- return -ENOENT;
+ err = -ENOENT;
+ goto errout_block;
}
chain = tcf_chain_create(block, chain_index);
if (!chain) {
NL_SET_ERR_MSG(extack, "Failed to create filter chain");
- return -ENOMEM;
+ err = -ENOMEM;
+ goto errout_block;
}
}
} else {
if (!chain || tcf_chain_held_by_acts_only(chain)) {
NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
- return -EINVAL;
+ err = -EINVAL;
+ goto errout_block;
}
tcf_chain_hold(chain);
}
@@ -1926,6 +2030,8 @@ replay:
errout:
tcf_chain_put(chain);
+errout_block:
+ tcf_block_release(q, block);
if (err == -EAGAIN)
/* Replay the request. */
goto replay;
@@ -1949,12 +2055,13 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
if (nlmsg_len(cb->nlh) < sizeof(*tcm))
return skb->len;
- err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
+ err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL,
+ cb->extack);
if (err)
return err;
if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
- block = tcf_block_lookup(net, tcm->tcm_block_index);
+ block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
if (!block)
goto out;
/* If we work with block index, q is NULL and parent value
@@ -2021,6 +2128,8 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
index++;
}
+ if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
+ tcf_block_refcnt_put(block);
cb->args[0] = index;
out:
@@ -2213,6 +2322,7 @@ static __net_init int tcf_net_init(struct net *net)
{
struct tcf_net *tn = net_generic(net, tcf_net_id);
+ spin_lock_init(&tn->idr_lock);
idr_init(&tn->idr);
return 0;
}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 6fd9bdd93796..9aada2d0ef06 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -98,7 +98,7 @@ struct cls_fl_filter {
struct list_head list;
u32 handle;
u32 flags;
- unsigned int in_hw_count;
+ u32 in_hw_count;
struct rcu_work rwork;
struct net_device *hw_dev;
};
@@ -993,7 +993,7 @@ static int fl_init_mask_hashtable(struct fl_flow_mask *mask)
}
#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
-#define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
+#define FL_KEY_MEMBER_SIZE(member) FIELD_SIZEOF(struct fl_flow_key, member)
#define FL_KEY_IS_MASKED(mask, member) \
memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member), \
@@ -1880,6 +1880,9 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags))
goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_FLOWER_IN_HW_COUNT, f->in_hw_count))
+ goto nla_put_failure;
+
if (tcf_exts_dump(skb, &f->exts))
goto nla_put_failure;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index b2c3406a2cf2..4b28fd44576d 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -68,7 +68,6 @@ struct tc_u_knode {
u32 mask;
u32 __percpu *pcpu_success;
#endif
- struct tcf_proto *tp;
struct rcu_work rwork;
/* The 'sel' field MUST be the last field in structure to allow for
* tc_u32_keys allocated at end of structure.
@@ -80,10 +79,10 @@ struct tc_u_hnode {
struct tc_u_hnode __rcu *next;
u32 handle;
u32 prio;
- struct tc_u_common *tp_c;
int refcnt;
unsigned int divisor;
struct idr handle_idr;
+ bool is_root;
struct rcu_head rcu;
u32 flags;
/* The 'ht' field MUST be the last field in structure to allow for
@@ -98,7 +97,7 @@ struct tc_u_common {
int refcnt;
struct idr handle_idr;
struct hlist_node hnode;
- struct rcu_head rcu;
+ long knodes;
};
static inline unsigned int u32_hash_fold(__be32 key,
@@ -344,19 +343,16 @@ static void *tc_u_common_ptr(const struct tcf_proto *tp)
return block->q;
}
-static unsigned int tc_u_hash(const struct tcf_proto *tp)
+static struct hlist_head *tc_u_hash(void *key)
{
- return hash_ptr(tc_u_common_ptr(tp), U32_HASH_SHIFT);
+ return tc_u_common_hash + hash_ptr(key, U32_HASH_SHIFT);
}
-static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
+static struct tc_u_common *tc_u_common_find(void *key)
{
struct tc_u_common *tc;
- unsigned int h;
-
- h = tc_u_hash(tp);
- hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
- if (tc->ptr == tc_u_common_ptr(tp))
+ hlist_for_each_entry(tc, tc_u_hash(key), hnode) {
+ if (tc->ptr == key)
return tc;
}
return NULL;
@@ -365,10 +361,8 @@ static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
static int u32_init(struct tcf_proto *tp)
{
struct tc_u_hnode *root_ht;
- struct tc_u_common *tp_c;
- unsigned int h;
-
- tp_c = tc_u_common_find(tp);
+ void *key = tc_u_common_ptr(tp);
+ struct tc_u_common *tp_c = tc_u_common_find(key);
root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
if (root_ht == NULL)
@@ -377,6 +371,7 @@ static int u32_init(struct tcf_proto *tp)
root_ht->refcnt++;
root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
root_ht->prio = tp->prio;
+ root_ht->is_root = true;
idr_init(&root_ht->handle_idr);
if (tp_c == NULL) {
@@ -385,18 +380,16 @@ static int u32_init(struct tcf_proto *tp)
kfree(root_ht);
return -ENOBUFS;
}
- tp_c->ptr = tc_u_common_ptr(tp);
+ tp_c->ptr = key;
INIT_HLIST_NODE(&tp_c->hnode);
idr_init(&tp_c->handle_idr);
- h = tc_u_hash(tp);
- hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]);
+ hlist_add_head(&tp_c->hnode, tc_u_hash(key));
}
tp_c->refcnt++;
RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
rcu_assign_pointer(tp_c->hlist, root_ht);
- root_ht->tp_c = tp_c;
root_ht->refcnt++;
rcu_assign_pointer(tp->root, root_ht);
@@ -404,8 +397,7 @@ static int u32_init(struct tcf_proto *tp)
return 0;
}
-static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
- bool free_pf)
+static int u32_destroy_key(struct tc_u_knode *n, bool free_pf)
{
struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
@@ -439,7 +431,7 @@ static void u32_delete_key_work(struct work_struct *work)
struct tc_u_knode,
rwork);
rtnl_lock();
- u32_destroy_key(key->tp, key, false);
+ u32_destroy_key(key, false);
rtnl_unlock();
}
@@ -456,12 +448,13 @@ static void u32_delete_key_freepf_work(struct work_struct *work)
struct tc_u_knode,
rwork);
rtnl_lock();
- u32_destroy_key(key->tp, key, true);
+ u32_destroy_key(key, true);
rtnl_unlock();
}
static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
{
+ struct tc_u_common *tp_c = tp->data;
struct tc_u_knode __rcu **kp;
struct tc_u_knode *pkp;
struct tc_u_hnode *ht = rtnl_dereference(key->ht_up);
@@ -472,6 +465,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
if (pkp == key) {
RCU_INIT_POINTER(*kp, key->next);
+ tp_c->knodes--;
tcf_unbind_filter(tp, &key->res);
idr_remove(&ht->handle_idr, key->handle);
@@ -586,6 +580,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
struct netlink_ext_ack *extack)
{
+ struct tc_u_common *tp_c = tp->data;
struct tc_u_knode *n;
unsigned int h;
@@ -593,13 +588,14 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
RCU_INIT_POINTER(ht->ht[h],
rtnl_dereference(n->next));
+ tp_c->knodes--;
tcf_unbind_filter(tp, &n->res);
u32_remove_hw_knode(tp, n, extack);
idr_remove(&ht->handle_idr, n->handle);
if (tcf_exts_get_net(&n->exts))
tcf_queue_work(&n->rwork, u32_delete_key_freepf_work);
else
- u32_destroy_key(n->tp, n, true);
+ u32_destroy_key(n, true);
}
}
}
@@ -632,17 +628,6 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
return -ENOENT;
}
-static bool ht_empty(struct tc_u_hnode *ht)
-{
- unsigned int h;
-
- for (h = 0; h <= ht->divisor; h++)
- if (rcu_access_pointer(ht->ht[h]))
- return false;
-
- return true;
-}
-
static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
@@ -680,20 +665,16 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
struct netlink_ext_ack *extack)
{
struct tc_u_hnode *ht = arg;
- struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
struct tc_u_common *tp_c = tp->data;
int ret = 0;
- if (ht == NULL)
- goto out;
-
if (TC_U32_KEY(ht->handle)) {
u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack);
ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
goto out;
}
- if (root_ht == ht) {
+ if (ht->is_root) {
NL_SET_ERR_MSG_MOD(extack, "Not allowed to delete root node");
return -EINVAL;
}
@@ -706,38 +687,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
}
out:
- *last = true;
- if (root_ht) {
- if (root_ht->refcnt > 2) {
- *last = false;
- goto ret;
- }
- if (root_ht->refcnt == 2) {
- if (!ht_empty(root_ht)) {
- *last = false;
- goto ret;
- }
- }
- }
-
- if (tp_c->refcnt > 1) {
- *last = false;
- goto ret;
- }
-
- if (tp_c->refcnt == 1) {
- struct tc_u_hnode *ht;
-
- for (ht = rtnl_dereference(tp_c->hlist);
- ht;
- ht = rtnl_dereference(ht->next))
- if (!ht_empty(ht)) {
- *last = false;
- break;
- }
- }
-
-ret:
+ *last = tp_c->refcnt == 1 && tp_c->knodes == 0;
return ret;
}
@@ -768,7 +718,7 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
};
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
- unsigned long base, struct tc_u_hnode *ht,
+ unsigned long base,
struct tc_u_knode *n, struct nlattr **tb,
struct nlattr *est, bool ovr,
struct netlink_ext_ack *extack)
@@ -789,12 +739,16 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
}
if (handle) {
- ht_down = u32_lookup_ht(ht->tp_c, handle);
+ ht_down = u32_lookup_ht(tp->data, handle);
if (!ht_down) {
NL_SET_ERR_MSG_MOD(extack, "Link hash table not found");
return -EINVAL;
}
+ if (ht_down->is_root) {
+ NL_SET_ERR_MSG_MOD(extack, "Not linking to root node");
+ return -EINVAL;
+ }
ht_down->refcnt++;
}
@@ -891,7 +845,6 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
/* Similarly success statistics must be moved as pointers */
new->pcpu_success = n->pcpu_success;
#endif
- new->tp = tp;
memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) {
@@ -960,18 +913,17 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (!new)
return -ENOMEM;
- err = u32_set_parms(net, tp, base,
- rtnl_dereference(n->ht_up), new, tb,
+ err = u32_set_parms(net, tp, base, new, tb,
tca[TCA_RATE], ovr, extack);
if (err) {
- u32_destroy_key(tp, new, false);
+ u32_destroy_key(new, false);
return err;
}
err = u32_replace_hw_knode(tp, new, flags, extack);
if (err) {
- u32_destroy_key(tp, new, false);
+ u32_destroy_key(new, false);
return err;
}
@@ -988,7 +940,11 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (tb[TCA_U32_DIVISOR]) {
unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
- if (--divisor > 0x100) {
+ if (!is_power_of_2(divisor)) {
+ NL_SET_ERR_MSG_MOD(extack, "Divisor is not a power of 2");
+ return -EINVAL;
+ }
+ if (divisor-- > 0x100) {
NL_SET_ERR_MSG_MOD(extack, "Exceeded maximum 256 hash buckets");
return -EINVAL;
}
@@ -1013,7 +969,6 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return err;
}
}
- ht->tp_c = tp_c;
ht->refcnt = 1;
ht->divisor = divisor;
ht->handle = handle;
@@ -1103,7 +1058,6 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
n->handle = handle;
n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
n->flags = flags;
- n->tp = tp;
err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
if (err < 0)
@@ -1125,7 +1079,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
}
#endif
- err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr,
+ err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], ovr,
extack);
if (err == 0) {
struct tc_u_knode __rcu **ins;
@@ -1146,6 +1100,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
RCU_INIT_POINTER(n->next, pins);
rcu_assign_pointer(*ins, n);
+ tp_c->knodes++;
*arg = n;
return 0;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 85e73f48e48f..cf5c714ae786 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -27,7 +27,6 @@
#include <linux/kmod.h>
#include <linux/list.h>
#include <linux/hrtimer.h>
-#include <linux/lockdep.h>
#include <linux/slab.h>
#include <linux/hashtable.h>
@@ -315,6 +314,24 @@ out:
return q;
}
+struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
+{
+ struct netdev_queue *nq;
+ struct Qdisc *q;
+
+ if (!handle)
+ return NULL;
+ q = qdisc_match_from_root(dev->qdisc, handle);
+ if (q)
+ goto out;
+
+ nq = dev_ingress_queue_rcu(dev);
+ if (nq)
+ q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
+out:
+ return q;
+}
+
static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
{
unsigned long cl;
@@ -921,7 +938,7 @@ static void notify_and_destroy(struct net *net, struct sk_buff *skb,
qdisc_notify(net, skb, n, clid, old, new);
if (old)
- qdisc_destroy(old);
+ qdisc_put(old);
}
/* Graft qdisc "new" to class "classid" of qdisc "parent" or
@@ -974,7 +991,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
qdisc_refcount_inc(new);
if (!ingress)
- qdisc_destroy(old);
+ qdisc_put(old);
}
skip:
@@ -1053,10 +1070,6 @@ static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
return 0;
}
-/* lockdep annotation is needed for ingress; egress gets it only for name */
-static struct lock_class_key qdisc_tx_lock;
-static struct lock_class_key qdisc_rx_lock;
-
/*
Allocate and initialize new qdisc.
@@ -1121,7 +1134,6 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
if (handle == TC_H_INGRESS) {
sch->flags |= TCQ_F_INGRESS;
handle = TC_H_MAKE(TC_H_INGRESS, 0);
- lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
} else {
if (handle == 0) {
handle = qdisc_alloc_handle(dev);
@@ -1129,7 +1141,6 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
if (handle == 0)
goto err_out3;
}
- lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
if (!netif_is_multiqueue(dev))
sch->flags |= TCQ_F_ONETXQUEUE;
}
@@ -1582,7 +1593,7 @@ graft:
err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
if (err) {
if (q)
- qdisc_destroy(q);
+ qdisc_put(q);
return err;
}
@@ -1660,7 +1671,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
ASSERT_RTNL();
err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
- rtm_tca_policy, NULL);
+ rtm_tca_policy, cb->extack);
if (err < 0)
return err;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index cd49afca9617..d714d3747bcb 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -150,7 +150,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
pr_debug("atm_tc_put: destroying\n");
list_del_init(&flow->list);
pr_debug("atm_tc_put: qdisc %p\n", flow->q);
- qdisc_destroy(flow->q);
+ qdisc_put(flow->q);
tcf_block_put(flow->block);
if (flow->sock) {
pr_debug("atm_tc_put: f_count %ld\n",
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 793016d722ec..b910cd5c56f7 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -812,7 +812,7 @@ static struct sk_buff *dequeue_head(struct cake_flow *flow)
if (skb) {
flow->head = skb->next;
- skb->next = NULL;
+ skb_mark_not_on_list(skb);
}
return skb;
@@ -1252,7 +1252,7 @@ found:
else
flow->head = elig_ack->next;
- elig_ack->next = NULL;
+ skb_mark_not_on_list(elig_ack);
return elig_ack;
}
@@ -1675,7 +1675,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
while (segs) {
nskb = segs->next;
- segs->next = NULL;
+ skb_mark_not_on_list(segs);
qdisc_skb_cb(segs)->pkt_len = segs->len;
cobalt_set_enqueue_time(segs, now);
get_cobalt_cb(segs)->adjusted_len = cake_overhead(q,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index f42025d53cfe..4dc05409e3fb 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1418,7 +1418,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
WARN_ON(cl->filters);
tcf_block_put(cl->block);
- qdisc_destroy(cl->q);
+ qdisc_put(cl->q);
qdisc_put_rtab(cl->R_tab);
gen_kill_estimator(&cl->rate_est);
if (cl != &q->link)
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index e26a24017faa..e689e11b6d0f 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -379,7 +379,7 @@ static void cbs_destroy(struct Qdisc *sch)
cbs_disable_offload(dev, q);
if (q->qdisc)
- qdisc_destroy(q->qdisc);
+ qdisc_put(q->qdisc);
}
static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index e0b0cf8a9939..cdebaed0f8cf 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -134,7 +134,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
tca[TCA_RATE]);
if (err) {
NL_SET_ERR_MSG(extack, "Failed to replace estimator");
- qdisc_destroy(cl->qdisc);
+ qdisc_put(cl->qdisc);
kfree(cl);
return err;
}
@@ -153,7 +153,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl)
{
gen_kill_estimator(&cl->rate_est);
- qdisc_destroy(cl->qdisc);
+ qdisc_put(cl->qdisc);
kfree(cl);
}
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 049714c57075..f6f480784bc6 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -412,7 +412,7 @@ static void dsmark_destroy(struct Qdisc *sch)
pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
tcf_block_put(p->block);
- qdisc_destroy(p->q);
+ qdisc_put(p->q);
if (p->mv != p->embedded)
kfree(p->mv);
}
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 24893d3b5d22..3809c9bf8896 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -177,7 +177,7 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
if (q) {
err = fifo_set_limit(q, limit);
if (err < 0) {
- qdisc_destroy(q);
+ qdisc_put(q);
q = NULL;
}
}
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 4808713c73b9..338222a6c664 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -106,7 +106,6 @@ struct fq_sched_data {
u64 stat_gc_flows;
u64 stat_internal_packets;
- u64 stat_tcp_retrans;
u64 stat_throttled;
u64 stat_flows_plimit;
u64 stat_pkts_too_long;
@@ -319,7 +318,7 @@ static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow)
if (skb) {
flow->head = skb->next;
- skb->next = NULL;
+ skb_mark_not_on_list(skb);
flow->qlen--;
qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
@@ -327,62 +326,17 @@ static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow)
return skb;
}
-/* We might add in the future detection of retransmits
- * For the time being, just return false
- */
-static bool skb_is_retransmit(struct sk_buff *skb)
-{
- return false;
-}
-
-/* add skb to flow queue
- * flow queue is a linked list, kind of FIFO, except for TCP retransmits
- * We special case tcp retransmits to be transmitted before other packets.
- * We rely on fact that TCP retransmits are unlikely, so we do not waste
- * a separate queue or a pointer.
- * head-> [retrans pkt 1]
- * [retrans pkt 2]
- * [ normal pkt 1]
- * [ normal pkt 2]
- * [ normal pkt 3]
- * tail-> [ normal pkt 4]
- */
static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
{
- struct sk_buff *prev, *head = flow->head;
+ struct sk_buff *head = flow->head;
skb->next = NULL;
- if (!head) {
+ if (!head)
flow->head = skb;
- flow->tail = skb;
- return;
- }
- if (likely(!skb_is_retransmit(skb))) {
+ else
flow->tail->next = skb;
- flow->tail = skb;
- return;
- }
- /* This skb is a tcp retransmit,
- * find the last retrans packet in the queue
- */
- prev = NULL;
- while (skb_is_retransmit(head)) {
- prev = head;
- head = head->next;
- if (!head)
- break;
- }
- if (!prev) { /* no rtx packet in queue, become the new head */
- skb->next = flow->head;
- flow->head = skb;
- } else {
- if (prev == flow->tail)
- flow->tail = skb;
- else
- skb->next = prev->next;
- prev->next = skb;
- }
+ flow->tail = skb;
}
static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
@@ -401,8 +355,6 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
f->qlen++;
- if (skb_is_retransmit(skb))
- q->stat_tcp_retrans++;
qdisc_qstats_backlog_inc(sch, skb);
if (fq_flow_is_detached(f)) {
struct sock *sk = skb->sk;
@@ -491,11 +443,16 @@ begin:
}
skb = f->head;
- if (unlikely(skb && now < f->time_next_packet &&
- !skb_is_tcp_pure_ack(skb))) {
- head->first = f->next;
- fq_flow_set_throttled(q, f);
- goto begin;
+ if (skb && !skb_is_tcp_pure_ack(skb)) {
+ u64 time_next_packet = max_t(u64, ktime_to_ns(skb->tstamp),
+ f->time_next_packet);
+
+ if (now < time_next_packet) {
+ head->first = f->next;
+ f->time_next_packet = time_next_packet;
+ fq_flow_set_throttled(q, f);
+ goto begin;
+ }
}
skb = fq_dequeue_head(sch, f);
@@ -513,11 +470,7 @@ begin:
prefetch(&skb->end);
f->credit -= qdisc_pkt_len(skb);
- if (!q->rate_enable)
- goto out;
-
- /* Do not pace locally generated ack packets */
- if (skb_is_tcp_pure_ack(skb))
+ if (ktime_to_ns(skb->tstamp) || !q->rate_enable)
goto out;
rate = q->flow_max_rate;
@@ -823,7 +776,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
q->fq_trees_log = ilog2(1024);
q->orphan_mask = 1024 - 1;
q->low_rate_threshold = 550000 / 8;
- qdisc_watchdog_init(&q->watchdog, sch);
+ qdisc_watchdog_init_clockid(&q->watchdog, sch, CLOCK_MONOTONIC);
if (opt)
err = fq_change(sch, opt, extack);
@@ -873,7 +826,7 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
st.gc_flows = q->stat_gc_flows;
st.highprio_packets = q->stat_internal_packets;
- st.tcp_retrans = q->stat_tcp_retrans;
+ st.tcp_retrans = 0;
st.throttled = q->stat_throttled;
st.flows_plimit = q->stat_flows_plimit;
st.pkts_too_long = q->stat_pkts_too_long;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 6c0a9d5dbf94..cd04d40c30b6 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -124,7 +124,7 @@ static inline struct sk_buff *dequeue_head(struct fq_codel_flow *flow)
struct sk_buff *skb = flow->head;
flow->head = skb->next;
- skb->next = NULL;
+ skb_mark_not_on_list(skb);
return skb;
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 69078c82963e..de1663f7d3ad 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -184,7 +184,7 @@ static void try_bulk_dequeue_skb(struct Qdisc *q,
skb = nskb;
(*packets)++; /* GSO counts as one pkt */
}
- skb->next = NULL;
+ skb_mark_not_on_list(skb);
}
/* This variant of try_bulk_dequeue_skb() makes sure
@@ -210,7 +210,7 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
skb = nskb;
} while (++cnt < 8);
(*packets) += cnt;
- skb->next = NULL;
+ skb_mark_not_on_list(skb);
}
/* Note that dequeue_skb can possibly return a SKB list (via skb->next).
@@ -572,6 +572,18 @@ struct Qdisc noop_qdisc = {
.dev_queue = &noop_netdev_queue,
.running = SEQCNT_ZERO(noop_qdisc.running),
.busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
+ .gso_skb = {
+ .next = (struct sk_buff *)&noop_qdisc.gso_skb,
+ .prev = (struct sk_buff *)&noop_qdisc.gso_skb,
+ .qlen = 0,
+ .lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.gso_skb.lock),
+ },
+ .skb_bad_txq = {
+ .next = (struct sk_buff *)&noop_qdisc.skb_bad_txq,
+ .prev = (struct sk_buff *)&noop_qdisc.skb_bad_txq,
+ .qlen = 0,
+ .lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.skb_bad_txq.lock),
+ },
};
EXPORT_SYMBOL(noop_qdisc);
@@ -901,7 +913,7 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
if (!ops->init || ops->init(sch, NULL, extack) == 0)
return sch;
- qdisc_destroy(sch);
+ qdisc_put(sch);
return NULL;
}
EXPORT_SYMBOL(qdisc_create_dflt);
@@ -941,15 +953,18 @@ void qdisc_free(struct Qdisc *qdisc)
kfree((char *) qdisc - qdisc->padded);
}
-void qdisc_destroy(struct Qdisc *qdisc)
+static void qdisc_free_cb(struct rcu_head *head)
+{
+ struct Qdisc *q = container_of(head, struct Qdisc, rcu);
+
+ qdisc_free(q);
+}
+
+static void qdisc_destroy(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
struct sk_buff *skb, *tmp;
- if (qdisc->flags & TCQ_F_BUILTIN ||
- !refcount_dec_and_test(&qdisc->refcnt))
- return;
-
#ifdef CONFIG_NET_SCHED
qdisc_hash_del(qdisc);
@@ -974,9 +989,34 @@ void qdisc_destroy(struct Qdisc *qdisc)
kfree_skb_list(skb);
}
- qdisc_free(qdisc);
+ call_rcu(&qdisc->rcu, qdisc_free_cb);
+}
+
+void qdisc_put(struct Qdisc *qdisc)
+{
+ if (qdisc->flags & TCQ_F_BUILTIN ||
+ !refcount_dec_and_test(&qdisc->refcnt))
+ return;
+
+ qdisc_destroy(qdisc);
+}
+EXPORT_SYMBOL(qdisc_put);
+
+/* Version of qdisc_put() that is called with rtnl mutex unlocked.
+ * Intended to be used as optimization, this function only takes rtnl lock if
+ * qdisc reference counter reached zero.
+ */
+
+void qdisc_put_unlocked(struct Qdisc *qdisc)
+{
+ if (qdisc->flags & TCQ_F_BUILTIN ||
+ !refcount_dec_and_rtnl_lock(&qdisc->refcnt))
+ return;
+
+ qdisc_destroy(qdisc);
+ rtnl_unlock();
}
-EXPORT_SYMBOL(qdisc_destroy);
+EXPORT_SYMBOL(qdisc_put_unlocked);
/* Attach toplevel qdisc to device queue. */
struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
@@ -1245,8 +1285,6 @@ static void dev_init_scheduler_queue(struct net_device *dev,
rcu_assign_pointer(dev_queue->qdisc, qdisc);
dev_queue->qdisc_sleeping = qdisc;
- __skb_queue_head_init(&qdisc->gso_skb);
- __skb_queue_head_init(&qdisc->skb_bad_txq);
}
void dev_init_scheduler(struct net_device *dev)
@@ -1270,7 +1308,7 @@ static void shutdown_scheduler_queue(struct net_device *dev,
rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
dev_queue->qdisc_sleeping = qdisc_default;
- qdisc_destroy(qdisc);
+ qdisc_put(qdisc);
}
}
@@ -1279,7 +1317,7 @@ void dev_shutdown(struct net_device *dev)
netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
if (dev_ingress_queue(dev))
shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
- qdisc_destroy(dev->qdisc);
+ qdisc_put(dev->qdisc);
dev->qdisc = &noop_qdisc;
WARN_ON(timer_pending(&dev->watchdog_timer));
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3278a76f6861..b18ec1f6de60 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1092,7 +1092,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
struct hfsc_sched *q = qdisc_priv(sch);
tcf_block_put(cl->block);
- qdisc_destroy(cl->qdisc);
+ qdisc_put(cl->qdisc);
gen_kill_estimator(&cl->rate_est);
if (cl != &q->root)
kfree(cl);
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index c3a8388dcdf6..9d6a47697406 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -330,7 +330,7 @@ static struct sk_buff *dequeue_head(struct wdrr_bucket *bucket)
struct sk_buff *skb = bucket->head;
bucket->head = skb->next;
- skb->next = NULL;
+ skb_mark_not_on_list(skb);
return skb;
}
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 43c4bfe625a9..58b449490757 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -132,7 +132,7 @@ struct htb_class {
struct htb_class_inner {
struct htb_prio clprio[TC_HTB_NUMPRIO];
} inner;
- } un;
+ };
s64 pq_key;
int prio_activity; /* for which prios are we active */
@@ -411,13 +411,13 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
int prio = ffz(~m);
m &= ~(1 << prio);
- if (p->un.inner.clprio[prio].feed.rb_node)
+ if (p->inner.clprio[prio].feed.rb_node)
/* parent already has its feed in use so that
* reset bit in mask as parent is already ok
*/
mask &= ~(1 << prio);
- htb_add_to_id_tree(&p->un.inner.clprio[prio].feed, cl, prio);
+ htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
}
p->prio_activity |= mask;
cl = p;
@@ -447,19 +447,19 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
int prio = ffz(~m);
m &= ~(1 << prio);
- if (p->un.inner.clprio[prio].ptr == cl->node + prio) {
+ if (p->inner.clprio[prio].ptr == cl->node + prio) {
/* we are removing child which is pointed to from
* parent feed - forget the pointer but remember
* classid
*/
- p->un.inner.clprio[prio].last_ptr_id = cl->common.classid;
- p->un.inner.clprio[prio].ptr = NULL;
+ p->inner.clprio[prio].last_ptr_id = cl->common.classid;
+ p->inner.clprio[prio].ptr = NULL;
}
htb_safe_rb_erase(cl->node + prio,
- &p->un.inner.clprio[prio].feed);
+ &p->inner.clprio[prio].feed);
- if (!p->un.inner.clprio[prio].feed.rb_node)
+ if (!p->inner.clprio[prio].feed.rb_node)
mask |= 1 << prio;
}
@@ -555,7 +555,7 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
*/
static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
{
- WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
+ WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
if (!cl->prio_activity) {
cl->prio_activity = 1 << cl->prio;
@@ -577,22 +577,6 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
cl->prio_activity = 0;
}
-static void htb_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
- struct qdisc_skb_head *qh)
-{
- struct sk_buff *last = qh->tail;
-
- if (last) {
- skb->next = NULL;
- last->next = skb;
- qh->tail = skb;
- } else {
- qh->tail = skb;
- qh->head = skb;
- }
- qh->qlen++;
-}
-
static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
@@ -603,7 +587,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (cl == HTB_DIRECT) {
/* enqueue to helper queue */
if (q->direct_queue.qlen < q->direct_qlen) {
- htb_enqueue_tail(skb, sch, &q->direct_queue);
+ __qdisc_enqueue_tail(skb, &q->direct_queue);
q->direct_pkts++;
} else {
return qdisc_drop(skb, sch, to_free);
@@ -615,7 +599,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
__qdisc_drop(skb, to_free);
return ret;
#endif
- } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q,
+ } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
to_free)) != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret)) {
qdisc_qstats_drop(sch);
@@ -823,7 +807,7 @@ static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
if (!cl->level)
return cl;
- clp = &cl->un.inner.clprio[prio];
+ clp = &cl->inner.clprio[prio];
(++sp)->root = clp->feed.rb_node;
sp->pptr = &clp->ptr;
sp->pid = &clp->last_ptr_id;
@@ -857,7 +841,7 @@ next:
* graft operation on the leaf since last dequeue;
* simply deactivate and skip such class
*/
- if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
+ if (unlikely(cl->leaf.q->q.qlen == 0)) {
struct htb_class *next;
htb_deactivate(q, cl);
@@ -873,12 +857,12 @@ next:
goto next;
}
- skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
+ skb = cl->leaf.q->dequeue(cl->leaf.q);
if (likely(skb != NULL))
break;
- qdisc_warn_nonwc("htb", cl->un.leaf.q);
- htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr:
+ qdisc_warn_nonwc("htb", cl->leaf.q);
+ htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
&q->hlevel[0].hprio[prio].ptr);
cl = htb_lookup_leaf(hprio, prio);
@@ -886,16 +870,16 @@ next:
if (likely(skb != NULL)) {
bstats_update(&cl->bstats, skb);
- cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
- if (cl->un.leaf.deficit[level] < 0) {
- cl->un.leaf.deficit[level] += cl->quantum;
- htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr :
+ cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
+ if (cl->leaf.deficit[level] < 0) {
+ cl->leaf.deficit[level] += cl->quantum;
+ htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
&q->hlevel[0].hprio[prio].ptr);
}
/* this used to be after charge_class but this constelation
* gives us slightly better performance
*/
- if (!cl->un.leaf.q->q.qlen)
+ if (!cl->leaf.q->q.qlen)
htb_deactivate(q, cl);
htb_charge_class(q, cl, level, skb);
}
@@ -972,10 +956,10 @@ static void htb_reset(struct Qdisc *sch)
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
if (cl->level)
- memset(&cl->un.inner, 0, sizeof(cl->un.inner));
+ memset(&cl->inner, 0, sizeof(cl->inner));
else {
- if (cl->un.leaf.q)
- qdisc_reset(cl->un.leaf.q);
+ if (cl->leaf.q)
+ qdisc_reset(cl->leaf.q);
}
cl->prio_activity = 0;
cl->cmode = HTB_CAN_SEND;
@@ -1098,8 +1082,8 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
*/
tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
tcm->tcm_handle = cl->common.classid;
- if (!cl->level && cl->un.leaf.q)
- tcm->tcm_info = cl->un.leaf.q->handle;
+ if (!cl->level && cl->leaf.q)
+ tcm->tcm_info = cl->leaf.q->handle;
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
@@ -1142,9 +1126,9 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
};
__u32 qlen = 0;
- if (!cl->level && cl->un.leaf.q) {
- qlen = cl->un.leaf.q->q.qlen;
- qs.backlog = cl->un.leaf.q->qstats.backlog;
+ if (!cl->level && cl->leaf.q) {
+ qlen = cl->leaf.q->q.qlen;
+ qs.backlog = cl->leaf.q->qstats.backlog;
}
cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
INT_MIN, INT_MAX);
@@ -1172,14 +1156,14 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
cl->common.classid, extack)) == NULL)
return -ENOBUFS;
- *old = qdisc_replace(sch, new, &cl->un.leaf.q);
+ *old = qdisc_replace(sch, new, &cl->leaf.q);
return 0;
}
static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
{
struct htb_class *cl = (struct htb_class *)arg;
- return !cl->level ? cl->un.leaf.q : NULL;
+ return !cl->level ? cl->leaf.q : NULL;
}
static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
@@ -1205,15 +1189,15 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
{
struct htb_class *parent = cl->parent;
- WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
+ WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
if (parent->cmode != HTB_CAN_SEND)
htb_safe_rb_erase(&parent->pq_node,
&q->hlevel[parent->level].wait_pq);
parent->level = 0;
- memset(&parent->un.inner, 0, sizeof(parent->un.inner));
- parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
+ memset(&parent->inner, 0, sizeof(parent->inner));
+ parent->leaf.q = new_q ? new_q : &noop_qdisc;
parent->tokens = parent->buffer;
parent->ctokens = parent->cbuffer;
parent->t_c = ktime_get_ns();
@@ -1223,8 +1207,8 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
{
if (!cl->level) {
- WARN_ON(!cl->un.leaf.q);
- qdisc_destroy(cl->un.leaf.q);
+ WARN_ON(!cl->leaf.q);
+ qdisc_put(cl->leaf.q);
}
gen_kill_estimator(&cl->rate_est);
tcf_block_put(cl->block);
@@ -1286,11 +1270,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
sch_tree_lock(sch);
if (!cl->level) {
- unsigned int qlen = cl->un.leaf.q->q.qlen;
- unsigned int backlog = cl->un.leaf.q->qstats.backlog;
+ unsigned int qlen = cl->leaf.q->q.qlen;
+ unsigned int backlog = cl->leaf.q->qstats.backlog;
- qdisc_reset(cl->un.leaf.q);
- qdisc_tree_reduce_backlog(cl->un.leaf.q, qlen, backlog);
+ qdisc_reset(cl->leaf.q);
+ qdisc_tree_reduce_backlog(cl->leaf.q, qlen, backlog);
}
/* delete from hash and active; remainder in destroy_class */
@@ -1419,13 +1403,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
classid, NULL);
sch_tree_lock(sch);
if (parent && !parent->level) {
- unsigned int qlen = parent->un.leaf.q->q.qlen;
- unsigned int backlog = parent->un.leaf.q->qstats.backlog;
+ unsigned int qlen = parent->leaf.q->q.qlen;
+ unsigned int backlog = parent->leaf.q->qstats.backlog;
/* turn parent into inner node */
- qdisc_reset(parent->un.leaf.q);
- qdisc_tree_reduce_backlog(parent->un.leaf.q, qlen, backlog);
- qdisc_destroy(parent->un.leaf.q);
+ qdisc_reset(parent->leaf.q);
+ qdisc_tree_reduce_backlog(parent->leaf.q, qlen, backlog);
+ qdisc_put(parent->leaf.q);
if (parent->prio_activity)
htb_deactivate(q, parent);
@@ -1436,10 +1420,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
}
parent->level = (parent->parent ? parent->parent->level
: TC_HTB_MAXDEPTH) - 1;
- memset(&parent->un.inner, 0, sizeof(parent->un.inner));
+ memset(&parent->inner, 0, sizeof(parent->inner));
}
/* leaf (we) needs elementary qdisc */
- cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
+ cl->leaf.q = new_q ? new_q : &noop_qdisc;
cl->common.classid = classid;
cl->parent = parent;
@@ -1455,8 +1439,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
qdisc_class_hash_insert(&q->clhash, &cl->common);
if (parent)
parent->children++;
- if (cl->un.leaf.q != &noop_qdisc)
- qdisc_hash_add(cl->un.leaf.q, true);
+ if (cl->leaf.q != &noop_qdisc)
+ qdisc_hash_add(cl->leaf.q, true);
} else {
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, NULL,
@@ -1478,7 +1462,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
/* it used to be a nasty bug here, we have to check that node
- * is really leaf before changing cl->un.leaf !
+ * is really leaf before changing cl->leaf !
*/
if (!cl->level) {
u64 quantum = cl->rate.rate_bytes_ps;
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index d6b8ae4ed7a3..f20f3a0f8424 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -65,7 +65,7 @@ static void mq_destroy(struct Qdisc *sch)
if (!priv->qdiscs)
return;
for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
- qdisc_destroy(priv->qdiscs[ntx]);
+ qdisc_put(priv->qdiscs[ntx]);
kfree(priv->qdiscs);
}
@@ -119,7 +119,7 @@ static void mq_attach(struct Qdisc *sch)
qdisc = priv->qdiscs[ntx];
old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
if (old)
- qdisc_destroy(old);
+ qdisc_put(old);
#ifdef CONFIG_NET_SCHED
if (ntx < dev->real_num_tx_queues)
qdisc_hash_add(qdisc, false);
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 0e9d761cdd80..d364e63c396d 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -40,7 +40,7 @@ static void mqprio_destroy(struct Qdisc *sch)
for (ntx = 0;
ntx < dev->num_tx_queues && priv->qdiscs[ntx];
ntx++)
- qdisc_destroy(priv->qdiscs[ntx]);
+ qdisc_put(priv->qdiscs[ntx]);
kfree(priv->qdiscs);
}
@@ -300,7 +300,7 @@ static void mqprio_attach(struct Qdisc *sch)
qdisc = priv->qdiscs[ntx];
old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
if (old)
- qdisc_destroy(old);
+ qdisc_put(old);
if (ntx < dev->real_num_tx_queues)
qdisc_hash_add(qdisc, false);
}
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 1da7ea8de0ad..7410ce4d0321 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -175,7 +175,7 @@ multiq_destroy(struct Qdisc *sch)
tcf_block_put(q->block);
for (band = 0; band < q->bands; band++)
- qdisc_destroy(q->queues[band]);
+ qdisc_put(q->queues[band]);
kfree(q->queues);
}
@@ -204,7 +204,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
q->queues[i] = &noop_qdisc;
qdisc_tree_reduce_backlog(child, child->q.qlen,
child->qstats.backlog);
- qdisc_destroy(child);
+ qdisc_put(child);
}
}
@@ -228,7 +228,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
qdisc_tree_reduce_backlog(old,
old->q.qlen,
old->qstats.backlog);
- qdisc_destroy(old);
+ qdisc_put(old);
}
sch_tree_unlock(sch);
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index ad18a2052416..57b3ad9394ad 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -412,16 +412,6 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
return segs;
}
-static void netem_enqueue_skb_head(struct qdisc_skb_head *qh, struct sk_buff *skb)
-{
- skb->next = qh->head;
-
- if (!qh->head)
- qh->tail = skb;
- qh->head = skb;
- qh->qlen++;
-}
-
/*
* Insert one skb into qdisc.
* Note: parent depends on return value to account for queue length.
@@ -570,7 +560,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
cb->time_to_send = ktime_get_ns();
q->counter = 0;
- netem_enqueue_skb_head(&sch->q, skb);
+ __qdisc_enqueue_head(skb, &sch->q);
sch->qstats.requeues++;
}
@@ -578,7 +568,7 @@ finish_segs:
if (segs) {
while (segs) {
skb2 = segs->next;
- segs->next = NULL;
+ skb_mark_not_on_list(segs);
qdisc_skb_cb(segs)->pkt_len = segs->len;
last_len = segs->len;
rc = qdisc_enqueue(segs, sch, to_free);
@@ -1032,7 +1022,7 @@ static void netem_destroy(struct Qdisc *sch)
qdisc_watchdog_cancel(&q->watchdog);
if (q->qdisc)
- qdisc_destroy(q->qdisc);
+ qdisc_put(q->qdisc);
dist_free(q->delay_dist);
dist_free(q->slot_dist);
}
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 18d30bb86881..d1429371592f 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -110,8 +110,8 @@ static bool drop_early(struct Qdisc *sch, u32 packet_size)
/* If current delay is less than half of target, and
* if drop prob is low already, disable early_drop
*/
- if ((q->vars.qdelay < q->params.target / 2)
- && (q->vars.prob < MAX_PROB / 5))
+ if ((q->vars.qdelay < q->params.target / 2) &&
+ (q->vars.prob < MAX_PROB / 5))
return false;
/* If we have fewer than 2 mtu-sized packets, disable drop_early,
@@ -209,7 +209,8 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt,
/* tupdate is in jiffies */
if (tb[TCA_PIE_TUPDATE])
- q->params.tupdate = usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE]));
+ q->params.tupdate =
+ usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE]));
if (tb[TCA_PIE_LIMIT]) {
u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]);
@@ -247,7 +248,6 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt,
static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
{
-
struct pie_sched_data *q = qdisc_priv(sch);
int qlen = sch->qstats.backlog; /* current queue size in bytes */
@@ -294,9 +294,9 @@ static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
* dq_count to 0 to re-enter the if block when the next
* packet is dequeued
*/
- if (qlen < QUEUE_THRESHOLD)
+ if (qlen < QUEUE_THRESHOLD) {
q->vars.dq_count = DQCOUNT_INVALID;
- else {
+ } else {
q->vars.dq_count = 0;
q->vars.dq_tstamp = psched_get_time();
}
@@ -370,7 +370,7 @@ static void calculate_probability(struct Qdisc *sch)
oldprob = q->vars.prob;
/* to ensure we increase probability in steps of no more than 2% */
- if (delta > (s32) (MAX_PROB / (100 / 2)) &&
+ if (delta > (s32)(MAX_PROB / (100 / 2)) &&
q->vars.prob >= MAX_PROB / 10)
delta = (MAX_PROB / 100) * 2;
@@ -405,7 +405,7 @@ static void calculate_probability(struct Qdisc *sch)
* delay is 0 for 2 consecutive Tupdate periods.
*/
- if ((qdelay == 0) && (qdelay_old == 0) && update_prob)
+ if (qdelay == 0 && qdelay_old == 0 && update_prob)
q->vars.prob = (q->vars.prob * 98) / 100;
q->vars.qdelay = qdelay;
@@ -419,8 +419,8 @@ static void calculate_probability(struct Qdisc *sch)
*/
if ((q->vars.qdelay < q->params.target / 2) &&
(q->vars.qdelay_old < q->params.target / 2) &&
- (q->vars.prob == 0) &&
- (q->vars.avg_dq_rate > 0))
+ q->vars.prob == 0 &&
+ q->vars.avg_dq_rate > 0)
pie_vars_init(&q->vars);
}
@@ -437,7 +437,6 @@ static void pie_timer(struct timer_list *t)
if (q->params.tupdate)
mod_timer(&q->adapt_timer, jiffies + q->params.tupdate);
spin_unlock(root_lock);
-
}
static int pie_init(struct Qdisc *sch, struct nlattr *opt,
@@ -469,15 +468,16 @@ static int pie_dump(struct Qdisc *sch, struct sk_buff *skb)
struct nlattr *opts;
opts = nla_nest_start(skb, TCA_OPTIONS);
- if (opts == NULL)
+ if (!opts)
goto nla_put_failure;
/* convert target from pschedtime to us */
if (nla_put_u32(skb, TCA_PIE_TARGET,
- ((u32) PSCHED_TICKS2NS(q->params.target)) /
+ ((u32)PSCHED_TICKS2NS(q->params.target)) /
NSEC_PER_USEC) ||
nla_put_u32(skb, TCA_PIE_LIMIT, sch->limit) ||
- nla_put_u32(skb, TCA_PIE_TUPDATE, jiffies_to_usecs(q->params.tupdate)) ||
+ nla_put_u32(skb, TCA_PIE_TUPDATE,
+ jiffies_to_usecs(q->params.tupdate)) ||
nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) ||
nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) ||
nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) ||
@@ -489,7 +489,6 @@ static int pie_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_failure:
nla_nest_cancel(skb, opts);
return -1;
-
}
static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
@@ -497,7 +496,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
struct pie_sched_data *q = qdisc_priv(sch);
struct tc_pie_xstats st = {
.prob = q->vars.prob,
- .delay = ((u32) PSCHED_TICKS2NS(q->vars.qdelay)) /
+ .delay = ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) /
NSEC_PER_USEC,
/* unscale and return dq_rate in bytes per sec */
.avg_dq_rate = q->vars.avg_dq_rate *
@@ -514,8 +513,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch)
{
- struct sk_buff *skb;
- skb = qdisc_dequeue_head(sch);
+ struct sk_buff *skb = qdisc_dequeue_head(sch);
if (!skb)
return NULL;
@@ -527,6 +525,7 @@ static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch)
static void pie_reset(struct Qdisc *sch)
{
struct pie_sched_data *q = qdisc_priv(sch);
+
qdisc_reset_queue(sch);
pie_vars_init(&q->vars);
}
@@ -534,6 +533,7 @@ static void pie_reset(struct Qdisc *sch)
static void pie_destroy(struct Qdisc *sch)
{
struct pie_sched_data *q = qdisc_priv(sch);
+
q->params.tupdate = 0;
del_timer_sync(&q->adapt_timer);
}
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 222e53d3d27a..f8af98621179 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -175,7 +175,7 @@ prio_destroy(struct Qdisc *sch)
tcf_block_put(q->block);
prio_offload(sch, NULL);
for (prio = 0; prio < q->bands; prio++)
- qdisc_destroy(q->queues[prio]);
+ qdisc_put(q->queues[prio]);
}
static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
@@ -205,7 +205,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
extack);
if (!queues[i]) {
while (i > oldbands)
- qdisc_destroy(queues[--i]);
+ qdisc_put(queues[--i]);
return -ENOMEM;
}
}
@@ -220,7 +220,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
qdisc_tree_reduce_backlog(child, child->q.qlen,
child->qstats.backlog);
- qdisc_destroy(child);
+ qdisc_put(child);
}
for (i = oldbands; i < q->bands; i++) {
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index bb1a9c11fc54..dc37c4ead439 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -526,7 +526,7 @@ set_change_agg:
return 0;
destroy_class:
- qdisc_destroy(cl->qdisc);
+ qdisc_put(cl->qdisc);
kfree(cl);
return err;
}
@@ -537,7 +537,7 @@ static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
qfq_rm_from_agg(q, cl);
gen_kill_estimator(&cl->rate_est);
- qdisc_destroy(cl->qdisc);
+ qdisc_put(cl->qdisc);
kfree(cl);
}
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 56c181c3feeb..3ce6c0a2c493 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -181,7 +181,7 @@ static void red_destroy(struct Qdisc *sch)
del_timer_sync(&q->adapt_timer);
red_offload(sch, false);
- qdisc_destroy(q->qdisc);
+ qdisc_put(q->qdisc);
}
static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
@@ -233,7 +233,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
if (child) {
qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
q->qdisc->qstats.backlog);
- qdisc_destroy(q->qdisc);
+ qdisc_put(q->qdisc);
q->qdisc = child;
}
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 7cbdad8419b7..bab506b01a32 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -469,7 +469,7 @@ static void sfb_destroy(struct Qdisc *sch)
struct sfb_sched_data *q = qdisc_priv(sch);
tcf_block_put(q->block);
- qdisc_destroy(q->qdisc);
+ qdisc_put(q->qdisc);
}
static const struct nla_policy sfb_policy[TCA_SFB_MAX + 1] = {
@@ -523,7 +523,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt,
qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
q->qdisc->qstats.backlog);
- qdisc_destroy(q->qdisc);
+ qdisc_put(q->qdisc);
q->qdisc = child;
q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval);
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
new file mode 100644
index 000000000000..206e4dbed12f
--- /dev/null
+++ b/net/sched/sch_taprio.c
@@ -0,0 +1,962 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* net/sched/sch_taprio.c Time Aware Priority Scheduler
+ *
+ * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+#include <net/sch_generic.h>
+
+#define TAPRIO_ALL_GATES_OPEN -1
+
+struct sched_entry {
+ struct list_head list;
+
+ /* The instant that this entry "closes" and the next one
+ * should open, the qdisc will make some effort so that no
+ * packet leaves after this time.
+ */
+ ktime_t close_time;
+ atomic_t budget;
+ int index;
+ u32 gate_mask;
+ u32 interval;
+ u8 command;
+};
+
+struct taprio_sched {
+ struct Qdisc **qdiscs;
+ struct Qdisc *root;
+ s64 base_time;
+ int clockid;
+ int picos_per_byte; /* Using picoseconds because for 10Gbps+
+ * speeds it's sub-nanoseconds per byte
+ */
+ size_t num_entries;
+
+ /* Protects the update side of the RCU protected current_entry */
+ spinlock_t current_entry_lock;
+ struct sched_entry __rcu *current_entry;
+ struct list_head entries;
+ ktime_t (*get_time)(void);
+ struct hrtimer advance_timer;
+};
+
+static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct Qdisc *child;
+ int queue;
+
+ queue = skb_get_queue_mapping(skb);
+
+ child = q->qdiscs[queue];
+ if (unlikely(!child))
+ return qdisc_drop(skb, sch, to_free);
+
+ qdisc_qstats_backlog_inc(sch, skb);
+ sch->q.qlen++;
+
+ return qdisc_enqueue(skb, child, to_free);
+}
+
+static struct sk_buff *taprio_peek(struct Qdisc *sch)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct sched_entry *entry;
+ struct sk_buff *skb;
+ u32 gate_mask;
+ int i;
+
+ rcu_read_lock();
+ entry = rcu_dereference(q->current_entry);
+ gate_mask = entry ? entry->gate_mask : -1;
+ rcu_read_unlock();
+
+ if (!gate_mask)
+ return NULL;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct Qdisc *child = q->qdiscs[i];
+ int prio;
+ u8 tc;
+
+ if (unlikely(!child))
+ continue;
+
+ skb = child->ops->peek(child);
+ if (!skb)
+ continue;
+
+ prio = skb->priority;
+ tc = netdev_get_prio_tc_map(dev, prio);
+
+ if (!(gate_mask & BIT(tc)))
+ return NULL;
+
+ return skb;
+ }
+
+ return NULL;
+}
+
+static inline int length_to_duration(struct taprio_sched *q, int len)
+{
+ return (len * q->picos_per_byte) / 1000;
+}
+
+static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct sched_entry *entry;
+ struct sk_buff *skb;
+ u32 gate_mask;
+ int i;
+
+ rcu_read_lock();
+ entry = rcu_dereference(q->current_entry);
+ /* if there's no entry, it means that the schedule didn't
+ * start yet, so force all gates to be open, this is in
+ * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5
+ * "AdminGateSates"
+ */
+ gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
+ rcu_read_unlock();
+
+ if (!gate_mask)
+ return NULL;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct Qdisc *child = q->qdiscs[i];
+ ktime_t guard;
+ int prio;
+ int len;
+ u8 tc;
+
+ if (unlikely(!child))
+ continue;
+
+ skb = child->ops->peek(child);
+ if (!skb)
+ continue;
+
+ prio = skb->priority;
+ tc = netdev_get_prio_tc_map(dev, prio);
+
+ if (!(gate_mask & BIT(tc)))
+ continue;
+
+ len = qdisc_pkt_len(skb);
+ guard = ktime_add_ns(q->get_time(),
+ length_to_duration(q, len));
+
+ /* In the case that there's no gate entry, there's no
+ * guard band ...
+ */
+ if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
+ ktime_after(guard, entry->close_time))
+ return NULL;
+
+ /* ... and no budget. */
+ if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
+ atomic_sub_return(len, &entry->budget) < 0)
+ return NULL;
+
+ skb = child->ops->dequeue(child);
+ if (unlikely(!skb))
+ return NULL;
+
+ qdisc_bstats_update(sch, skb);
+ qdisc_qstats_backlog_dec(sch, skb);
+ sch->q.qlen--;
+
+ return skb;
+ }
+
+ return NULL;
+}
+
+static bool should_restart_cycle(const struct taprio_sched *q,
+ const struct sched_entry *entry)
+{
+ WARN_ON(!entry);
+
+ return list_is_last(&entry->list, &q->entries);
+}
+
+static enum hrtimer_restart advance_sched(struct hrtimer *timer)
+{
+ struct taprio_sched *q = container_of(timer, struct taprio_sched,
+ advance_timer);
+ struct sched_entry *entry, *next;
+ struct Qdisc *sch = q->root;
+ ktime_t close_time;
+
+ spin_lock(&q->current_entry_lock);
+ entry = rcu_dereference_protected(q->current_entry,
+ lockdep_is_held(&q->current_entry_lock));
+
+ /* This is the case that it's the first time that the schedule
+ * runs, so it only happens once per schedule. The first entry
+ * is pre-calculated during the schedule initialization.
+ */
+ if (unlikely(!entry)) {
+ next = list_first_entry(&q->entries, struct sched_entry,
+ list);
+ close_time = next->close_time;
+ goto first_run;
+ }
+
+ if (should_restart_cycle(q, entry))
+ next = list_first_entry(&q->entries, struct sched_entry,
+ list);
+ else
+ next = list_next_entry(entry, list);
+
+ close_time = ktime_add_ns(entry->close_time, next->interval);
+
+ next->close_time = close_time;
+ atomic_set(&next->budget,
+ (next->interval * 1000) / q->picos_per_byte);
+
+first_run:
+ rcu_assign_pointer(q->current_entry, next);
+ spin_unlock(&q->current_entry_lock);
+
+ hrtimer_set_expires(&q->advance_timer, close_time);
+
+ rcu_read_lock();
+ __netif_schedule(sch);
+ rcu_read_unlock();
+
+ return HRTIMER_RESTART;
+}
+
+static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
+ [TCA_TAPRIO_SCHED_ENTRY_INDEX] = { .type = NLA_U32 },
+ [TCA_TAPRIO_SCHED_ENTRY_CMD] = { .type = NLA_U8 },
+ [TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 },
+ [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = {
+ [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
+ [TCA_TAPRIO_ATTR_PRIOMAP] = {
+ .len = sizeof(struct tc_mqprio_qopt)
+ },
+ [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED },
+ [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 },
+ [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED },
+ [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
+};
+
+static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
+ struct netlink_ext_ack *extack)
+{
+ u32 interval = 0;
+
+ if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD])
+ entry->command = nla_get_u8(
+ tb[TCA_TAPRIO_SCHED_ENTRY_CMD]);
+
+ if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK])
+ entry->gate_mask = nla_get_u32(
+ tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]);
+
+ if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL])
+ interval = nla_get_u32(
+ tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]);
+
+ if (interval == 0) {
+ NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
+ return -EINVAL;
+ }
+
+ entry->interval = interval;
+
+ return 0;
+}
+
+static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry,
+ int index, struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
+ int err;
+
+ err = nla_parse_nested(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n,
+ entry_policy, NULL);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack, "Could not parse nested entry");
+ return -EINVAL;
+ }
+
+ entry->index = index;
+
+ return fill_sched_entry(tb, entry, extack);
+}
+
+/* Returns the number of entries in case of success */
+static int parse_sched_single_entry(struct nlattr *n,
+ struct taprio_sched *q,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb_entry[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
+ struct nlattr *tb_list[TCA_TAPRIO_SCHED_MAX + 1] = { };
+ struct sched_entry *entry;
+ bool found = false;
+ u32 index;
+ int err;
+
+ err = nla_parse_nested(tb_list, TCA_TAPRIO_SCHED_MAX,
+ n, entry_list_policy, NULL);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack, "Could not parse nested entry");
+ return -EINVAL;
+ }
+
+ if (!tb_list[TCA_TAPRIO_SCHED_ENTRY]) {
+ NL_SET_ERR_MSG(extack, "Single-entry must include an entry");
+ return -EINVAL;
+ }
+
+ err = nla_parse_nested(tb_entry, TCA_TAPRIO_SCHED_ENTRY_MAX,
+ tb_list[TCA_TAPRIO_SCHED_ENTRY],
+ entry_policy, NULL);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack, "Could not parse nested entry");
+ return -EINVAL;
+ }
+
+ if (!tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]) {
+ NL_SET_ERR_MSG(extack, "Entry must specify an index\n");
+ return -EINVAL;
+ }
+
+ index = nla_get_u32(tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]);
+ if (index >= q->num_entries) {
+ NL_SET_ERR_MSG(extack, "Index for single entry exceeds number of entries in schedule");
+ return -EINVAL;
+ }
+
+ list_for_each_entry(entry, &q->entries, list) {
+ if (entry->index == index) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ NL_SET_ERR_MSG(extack, "Could not find entry");
+ return -ENOENT;
+ }
+
+ err = fill_sched_entry(tb_entry, entry, extack);
+ if (err < 0)
+ return err;
+
+ return q->num_entries;
+}
+
+static int parse_sched_list(struct nlattr *list,
+ struct taprio_sched *q,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *n;
+ int err, rem;
+ int i = 0;
+
+ if (!list)
+ return -EINVAL;
+
+ nla_for_each_nested(n, list, rem) {
+ struct sched_entry *entry;
+
+ if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) {
+ NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'");
+ continue;
+ }
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ NL_SET_ERR_MSG(extack, "Not enough memory for entry");
+ return -ENOMEM;
+ }
+
+ err = parse_sched_entry(n, entry, i, extack);
+ if (err < 0) {
+ kfree(entry);
+ return err;
+ }
+
+ list_add_tail(&entry->list, &q->entries);
+ i++;
+ }
+
+ q->num_entries = i;
+
+ return i;
+}
+
+/* Returns the number of entries in case of success */
+static int parse_taprio_opt(struct nlattr **tb, struct taprio_sched *q,
+ struct netlink_ext_ack *extack)
+{
+ int err = 0;
+ int clockid;
+
+ if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] &&
+ tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY])
+ return -EINVAL;
+
+ if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] && q->num_entries == 0)
+ return -EINVAL;
+
+ if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID])
+ return -EINVAL;
+
+ if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME])
+ q->base_time = nla_get_s64(
+ tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]);
+
+ if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
+ clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
+
+ /* We only support static clockids and we don't allow
+ * for it to be modified after the first init.
+ */
+ if (clockid < 0 || (q->clockid != -1 && q->clockid != clockid))
+ return -EINVAL;
+
+ q->clockid = clockid;
+ }
+
+ if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST])
+ err = parse_sched_list(
+ tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], q, extack);
+ else if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY])
+ err = parse_sched_single_entry(
+ tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY], q, extack);
+
+ /* parse_sched_* return the number of entries in the schedule,
+ * a schedule with zero entries is an error.
+ */
+ if (err == 0) {
+ NL_SET_ERR_MSG(extack, "The schedule should contain at least one entry");
+ return -EINVAL;
+ }
+
+ return err;
+}
+
+static int taprio_parse_mqprio_opt(struct net_device *dev,
+ struct tc_mqprio_qopt *qopt,
+ struct netlink_ext_ack *extack)
+{
+ int i, j;
+
+ if (!qopt) {
+ NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
+ return -EINVAL;
+ }
+
+ /* Verify num_tc is not out of max range */
+ if (qopt->num_tc > TC_MAX_QUEUE) {
+ NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range");
+ return -EINVAL;
+ }
+
+ /* taprio imposes that traffic classes map 1:n to tx queues */
+ if (qopt->num_tc > dev->num_tx_queues) {
+ NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues");
+ return -EINVAL;
+ }
+
+ /* Verify priority mapping uses valid tcs */
+ for (i = 0; i < TC_BITMASK + 1; i++) {
+ if (qopt->prio_tc_map[i] >= qopt->num_tc) {
+ NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping");
+ return -EINVAL;
+ }
+ }
+
+ for (i = 0; i < qopt->num_tc; i++) {
+ unsigned int last = qopt->offset[i] + qopt->count[i];
+
+ /* Verify the queue count is in tx range being equal to the
+ * real_num_tx_queues indicates the last queue is in use.
+ */
+ if (qopt->offset[i] >= dev->num_tx_queues ||
+ !qopt->count[i] ||
+ last > dev->real_num_tx_queues) {
+ NL_SET_ERR_MSG(extack, "Invalid queue in traffic class to queue mapping");
+ return -EINVAL;
+ }
+
+ /* Verify that the offset and counts do not overlap */
+ for (j = i + 1; j < qopt->num_tc; j++) {
+ if (last > qopt->offset[j]) {
+ NL_SET_ERR_MSG(extack, "Detected overlap in the traffic class to queue mapping");
+ return -EINVAL;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static ktime_t taprio_get_start_time(struct Qdisc *sch)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct sched_entry *entry;
+ ktime_t now, base, cycle;
+ s64 n;
+
+ base = ns_to_ktime(q->base_time);
+ cycle = 0;
+
+ /* Calculate the cycle_time, by summing all the intervals.
+ */
+ list_for_each_entry(entry, &q->entries, list)
+ cycle = ktime_add_ns(cycle, entry->interval);
+
+ if (!cycle)
+ return base;
+
+ now = q->get_time();
+
+ if (ktime_after(base, now))
+ return base;
+
+ /* Schedule the start time for the beginning of the next
+ * cycle.
+ */
+ n = div64_s64(ktime_sub_ns(now, base), cycle);
+
+ return ktime_add_ns(base, (n + 1) * cycle);
+}
+
+static void taprio_start_sched(struct Qdisc *sch, ktime_t start)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct sched_entry *first;
+ unsigned long flags;
+
+ spin_lock_irqsave(&q->current_entry_lock, flags);
+
+ first = list_first_entry(&q->entries, struct sched_entry,
+ list);
+
+ first->close_time = ktime_add_ns(start, first->interval);
+ atomic_set(&first->budget,
+ (first->interval * 1000) / q->picos_per_byte);
+ rcu_assign_pointer(q->current_entry, NULL);
+
+ spin_unlock_irqrestore(&q->current_entry_lock, flags);
+
+ hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS);
+}
+
+static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { };
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_mqprio_qopt *mqprio = NULL;
+ struct ethtool_link_ksettings ecmd;
+ int i, err, size;
+ s64 link_speed;
+ ktime_t start;
+
+ err = nla_parse_nested(tb, TCA_TAPRIO_ATTR_MAX, opt,
+ taprio_policy, extack);
+ if (err < 0)
+ return err;
+
+ err = -EINVAL;
+ if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
+ mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
+
+ err = taprio_parse_mqprio_opt(dev, mqprio, extack);
+ if (err < 0)
+ return err;
+
+ /* A schedule with less than one entry is an error */
+ size = parse_taprio_opt(tb, q, extack);
+ if (size < 0)
+ return size;
+
+ hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
+ q->advance_timer.function = advance_sched;
+
+ switch (q->clockid) {
+ case CLOCK_REALTIME:
+ q->get_time = ktime_get_real;
+ break;
+ case CLOCK_MONOTONIC:
+ q->get_time = ktime_get;
+ break;
+ case CLOCK_BOOTTIME:
+ q->get_time = ktime_get_boottime;
+ break;
+ case CLOCK_TAI:
+ q->get_time = ktime_get_clocktai;
+ break;
+ default:
+ return -ENOTSUPP;
+ }
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct netdev_queue *dev_queue;
+ struct Qdisc *qdisc;
+
+ dev_queue = netdev_get_tx_queue(dev, i);
+ qdisc = qdisc_create_dflt(dev_queue,
+ &pfifo_qdisc_ops,
+ TC_H_MAKE(TC_H_MAJ(sch->handle),
+ TC_H_MIN(i + 1)),
+ extack);
+ if (!qdisc)
+ return -ENOMEM;
+
+ if (i < dev->real_num_tx_queues)
+ qdisc_hash_add(qdisc, false);
+
+ q->qdiscs[i] = qdisc;
+ }
+
+ if (mqprio) {
+ netdev_set_num_tc(dev, mqprio->num_tc);
+ for (i = 0; i < mqprio->num_tc; i++)
+ netdev_set_tc_queue(dev, i,
+ mqprio->count[i],
+ mqprio->offset[i]);
+
+ /* Always use supplied priority mappings */
+ for (i = 0; i < TC_BITMASK + 1; i++)
+ netdev_set_prio_tc_map(dev, i,
+ mqprio->prio_tc_map[i]);
+ }
+
+ if (!__ethtool_get_link_ksettings(dev, &ecmd))
+ link_speed = ecmd.base.speed;
+ else
+ link_speed = SPEED_1000;
+
+ q->picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8,
+ link_speed * 1000 * 1000);
+
+ start = taprio_get_start_time(sch);
+ if (!start)
+ return 0;
+
+ taprio_start_sched(sch, start);
+
+ return 0;
+}
+
+static void taprio_destroy(struct Qdisc *sch)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct sched_entry *entry, *n;
+ unsigned int i;
+
+ hrtimer_cancel(&q->advance_timer);
+
+ if (q->qdiscs) {
+ for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++)
+ qdisc_put(q->qdiscs[i]);
+
+ kfree(q->qdiscs);
+ }
+ q->qdiscs = NULL;
+
+ netdev_set_num_tc(dev, 0);
+
+ list_for_each_entry_safe(entry, n, &q->entries, list) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+}
+
+static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+
+ INIT_LIST_HEAD(&q->entries);
+ spin_lock_init(&q->current_entry_lock);
+
+ /* We may overwrite the configuration later */
+ hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS);
+
+ q->root = sch;
+
+ /* We only support static clockids. Use an invalid value as default
+ * and get the valid one on taprio_change().
+ */
+ q->clockid = -1;
+
+ if (sch->parent != TC_H_ROOT)
+ return -EOPNOTSUPP;
+
+ if (!netif_is_multiqueue(dev))
+ return -EOPNOTSUPP;
+
+ /* pre-allocate qdisc, attachment can't fail */
+ q->qdiscs = kcalloc(dev->num_tx_queues,
+ sizeof(q->qdiscs[0]),
+ GFP_KERNEL);
+
+ if (!q->qdiscs)
+ return -ENOMEM;
+
+ if (!opt)
+ return -EINVAL;
+
+ return taprio_change(sch, opt, extack);
+}
+
+static struct netdev_queue *taprio_queue_get(struct Qdisc *sch,
+ unsigned long cl)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ unsigned long ntx = cl - 1;
+
+ if (ntx >= dev->num_tx_queues)
+ return NULL;
+
+ return netdev_get_tx_queue(dev, ntx);
+}
+
+static int taprio_graft(struct Qdisc *sch, unsigned long cl,
+ struct Qdisc *new, struct Qdisc **old,
+ struct netlink_ext_ack *extack)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+
+ if (!dev_queue)
+ return -EINVAL;
+
+ if (dev->flags & IFF_UP)
+ dev_deactivate(dev);
+
+ *old = q->qdiscs[cl - 1];
+ q->qdiscs[cl - 1] = new;
+
+ if (new)
+ new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
+
+ if (dev->flags & IFF_UP)
+ dev_activate(dev);
+
+ return 0;
+}
+
+static int dump_entry(struct sk_buff *msg,
+ const struct sched_entry *entry)
+{
+ struct nlattr *item;
+
+ item = nla_nest_start(msg, TCA_TAPRIO_SCHED_ENTRY);
+ if (!item)
+ return -ENOSPC;
+
+ if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK,
+ entry->gate_mask))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL,
+ entry->interval))
+ goto nla_put_failure;
+
+ return nla_nest_end(msg, item);
+
+nla_put_failure:
+ nla_nest_cancel(msg, item);
+ return -1;
+}
+
+static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_mqprio_qopt opt = { 0 };
+ struct nlattr *nest, *entry_list;
+ struct sched_entry *entry;
+ unsigned int i;
+
+ opt.num_tc = netdev_get_num_tc(dev);
+ memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
+
+ for (i = 0; i < netdev_get_num_tc(dev); i++) {
+ opt.count[i] = dev->tc_to_txq[i].count;
+ opt.offset[i] = dev->tc_to_txq[i].offset;
+ }
+
+ nest = nla_nest_start(skb, TCA_OPTIONS);
+ if (!nest)
+ return -ENOSPC;
+
+ if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt))
+ goto options_error;
+
+ if (nla_put_s64(skb, TCA_TAPRIO_ATTR_SCHED_BASE_TIME,
+ q->base_time, TCA_TAPRIO_PAD))
+ goto options_error;
+
+ if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
+ goto options_error;
+
+ entry_list = nla_nest_start(skb, TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST);
+ if (!entry_list)
+ goto options_error;
+
+ list_for_each_entry(entry, &q->entries, list) {
+ if (dump_entry(skb, entry) < 0)
+ goto options_error;
+ }
+
+ nla_nest_end(skb, entry_list);
+
+ return nla_nest_end(skb, nest);
+
+options_error:
+ nla_nest_cancel(skb, nest);
+ return -1;
+}
+
+static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
+{
+ struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+
+ if (!dev_queue)
+ return NULL;
+
+ return dev_queue->qdisc_sleeping;
+}
+
+static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
+{
+ unsigned int ntx = TC_H_MIN(classid);
+
+ if (!taprio_queue_get(sch, ntx))
+ return 0;
+ return ntx;
+}
+
+static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,
+ struct sk_buff *skb, struct tcmsg *tcm)
+{
+ struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+
+ tcm->tcm_parent = TC_H_ROOT;
+ tcm->tcm_handle |= TC_H_MIN(cl);
+ tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+
+ return 0;
+}
+
+static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+ struct gnet_dump *d)
+ __releases(d->lock)
+ __acquires(d->lock)
+{
+ struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+
+ sch = dev_queue->qdisc_sleeping;
+ if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
+ gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0)
+ return -1;
+ return 0;
+}
+
+static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ unsigned long ntx;
+
+ if (arg->stop)
+ return;
+
+ arg->count = arg->skip;
+ for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
+ if (arg->fn(sch, ntx + 1, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+ arg->count++;
+ }
+}
+
+static struct netdev_queue *taprio_select_queue(struct Qdisc *sch,
+ struct tcmsg *tcm)
+{
+ return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
+}
+
+static const struct Qdisc_class_ops taprio_class_ops = {
+ .graft = taprio_graft,
+ .leaf = taprio_leaf,
+ .find = taprio_find,
+ .walk = taprio_walk,
+ .dump = taprio_dump_class,
+ .dump_stats = taprio_dump_class_stats,
+ .select_queue = taprio_select_queue,
+};
+
+static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
+ .cl_ops = &taprio_class_ops,
+ .id = "taprio",
+ .priv_size = sizeof(struct taprio_sched),
+ .init = taprio_init,
+ .destroy = taprio_destroy,
+ .peek = taprio_peek,
+ .dequeue = taprio_dequeue,
+ .enqueue = taprio_enqueue,
+ .dump = taprio_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __init taprio_module_init(void)
+{
+ return register_qdisc(&taprio_qdisc_ops);
+}
+
+static void __exit taprio_module_exit(void)
+{
+ unregister_qdisc(&taprio_qdisc_ops);
+}
+
+module_init(taprio_module_init);
+module_exit(taprio_module_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 6f74a426f159..942dcca09cf2 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -162,7 +162,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
nb = 0;
while (segs) {
nskb = segs->next;
- segs->next = NULL;
+ skb_mark_not_on_list(segs);
qdisc_skb_cb(segs)->pkt_len = segs->len;
len += segs->len;
ret = qdisc_enqueue(segs, q->qdisc, to_free);
@@ -392,7 +392,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
if (child) {
qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
q->qdisc->qstats.backlog);
- qdisc_destroy(q->qdisc);
+ qdisc_put(q->qdisc);
q->qdisc = child;
}
q->limit = qopt->limit;
@@ -438,7 +438,7 @@ static void tbf_destroy(struct Qdisc *sch)
struct tbf_sched_data *q = qdisc_priv(sch);
qdisc_watchdog_cancel(&q->watchdog);
- qdisc_destroy(q->qdisc);
+ qdisc_put(q->qdisc);
}
static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 0b427100b0d4..331cc734e3db 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -459,7 +459,7 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ul
* element in the queue, then count it towards
* possible PD.
*/
- if (pos == ulpq->reasm.next) {
+ if (skb_queue_is_first(&ulpq->reasm, pos)) {
pd_first = pos;
pd_last = pos;
pd_len = pos->len;
diff --git a/net/socket.c b/net/socket.c
index 01f3f8f32d6f..713dc4833d40 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1475,7 +1475,7 @@ int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock) {
err = move_addr_to_kernel(umyaddr, addrlen, &address);
- if (err >= 0) {
+ if (!err) {
err = security_socket_bind(sock,
(struct sockaddr *)&address,
addrlen);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 645c16052052..e65c3a8551e4 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -577,7 +577,7 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev,
rcu_dereference_rtnl(orig_dev->tipc_ptr);
if (likely(b && test_bit(0, &b->up) &&
(skb->pkt_type <= PACKET_MULTICAST))) {
- skb->next = NULL;
+ skb_mark_not_on_list(skb);
tipc_rcv(dev_net(b->pt.dev), skb, b);
rcu_read_unlock();
return NET_RX_SUCCESS;
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index b61891054709..f48e5857210f 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -499,54 +499,56 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
/**
* tipc_msg_reverse(): swap source and destination addresses and add error code
* @own_node: originating node id for reversed message
- * @skb: buffer containing message to be reversed; may be replaced.
+ * @skb: buffer containing message to be reversed; will be consumed
* @err: error code to be set in message, if any
- * Consumes buffer at failure
+ * Replaces consumed buffer with new one when successful
* Returns true if success, otherwise false
*/
bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err)
{
struct sk_buff *_skb = *skb;
- struct tipc_msg *hdr;
- struct tipc_msg ohdr;
- int dlen;
+ struct tipc_msg *_hdr, *hdr;
+ int hlen, dlen;
if (skb_linearize(_skb))
goto exit;
- hdr = buf_msg(_skb);
- dlen = min_t(uint, msg_data_sz(hdr), MAX_FORWARD_SIZE);
- if (msg_dest_droppable(hdr))
+ _hdr = buf_msg(_skb);
+ dlen = min_t(uint, msg_data_sz(_hdr), MAX_FORWARD_SIZE);
+ hlen = msg_hdr_sz(_hdr);
+
+ if (msg_dest_droppable(_hdr))
goto exit;
- if (msg_errcode(hdr))
+ if (msg_errcode(_hdr))
goto exit;
- /* Take a copy of original header before altering message */
- memcpy(&ohdr, hdr, msg_hdr_sz(hdr));
-
- /* Never return SHORT header; expand by replacing buffer if necessary */
- if (msg_short(hdr)) {
- *skb = tipc_buf_acquire(BASIC_H_SIZE + dlen, GFP_ATOMIC);
- if (!*skb)
- goto exit;
- memcpy((*skb)->data + BASIC_H_SIZE, msg_data(hdr), dlen);
- kfree_skb(_skb);
- _skb = *skb;
- hdr = buf_msg(_skb);
- memcpy(hdr, &ohdr, BASIC_H_SIZE);
- msg_set_hdr_sz(hdr, BASIC_H_SIZE);
- }
+ /* Never return SHORT header */
+ if (hlen == SHORT_H_SIZE)
+ hlen = BASIC_H_SIZE;
+
+ /* Don't return data along with SYN+, - sender has a clone */
+ if (msg_is_syn(_hdr) && err == TIPC_ERR_OVERLOAD)
+ dlen = 0;
+
+ /* Allocate new buffer to return */
+ *skb = tipc_buf_acquire(hlen + dlen, GFP_ATOMIC);
+ if (!*skb)
+ goto exit;
+ memcpy((*skb)->data, _skb->data, msg_hdr_sz(_hdr));
+ memcpy((*skb)->data + hlen, msg_data(_hdr), dlen);
- /* Now reverse the concerned fields */
+ /* Build reverse header in new buffer */
+ hdr = buf_msg(*skb);
+ msg_set_hdr_sz(hdr, hlen);
msg_set_errcode(hdr, err);
msg_set_non_seq(hdr, 0);
- msg_set_origport(hdr, msg_destport(&ohdr));
- msg_set_destport(hdr, msg_origport(&ohdr));
- msg_set_destnode(hdr, msg_prevnode(&ohdr));
+ msg_set_origport(hdr, msg_destport(_hdr));
+ msg_set_destport(hdr, msg_origport(_hdr));
+ msg_set_destnode(hdr, msg_prevnode(_hdr));
msg_set_prevnode(hdr, own_node);
msg_set_orignode(hdr, own_node);
- msg_set_size(hdr, msg_hdr_sz(hdr) + dlen);
- skb_trim(_skb, msg_size(hdr));
+ msg_set_size(hdr, hlen + dlen);
skb_orphan(_skb);
+ kfree_skb(_skb);
return true;
exit:
kfree_skb(_skb);
@@ -554,6 +556,22 @@ exit:
return false;
}
+bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy)
+{
+ struct sk_buff *skb, *_skb;
+
+ skb_queue_walk(msg, skb) {
+ _skb = skb_clone(skb, GFP_ATOMIC);
+ if (!_skb) {
+ __skb_queue_purge(cpy);
+ pr_err_ratelimited("Failed to clone buffer chain\n");
+ return false;
+ }
+ __skb_queue_tail(cpy, _skb);
+ }
+ return true;
+}
+
/**
* tipc_msg_lookup_dest(): try to find new destination for named message
* @skb: the buffer containing the message.
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index a4e944d59394..a2879e6ec5b6 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -216,6 +216,16 @@ static inline void msg_set_non_seq(struct tipc_msg *m, u32 n)
msg_set_bits(m, 0, 20, 1, n);
}
+static inline int msg_is_syn(struct tipc_msg *m)
+{
+ return msg_bits(m, 0, 17, 1);
+}
+
+static inline void msg_set_syn(struct tipc_msg *m, u32 d)
+{
+ msg_set_bits(m, 0, 17, 1, d);
+}
+
static inline int msg_dest_droppable(struct tipc_msg *m)
{
return msg_bits(m, 0, 19, 1);
@@ -970,6 +980,7 @@ bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg,
struct sk_buff_head *cpy);
void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
struct sk_buff *skb);
+bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy);
static inline u16 buf_seqno(struct sk_buff *skb)
{
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 48b3298a248d..03f5efb62cfb 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -45,6 +45,7 @@
/* Optional capabilities supported by this code version
*/
enum {
+ TIPC_SYN_BIT = (1),
TIPC_BCAST_SYNCH = (1 << 1),
TIPC_BCAST_STATE_NACK = (1 << 2),
TIPC_BLOCK_FLOWCTL = (1 << 3),
@@ -53,11 +54,12 @@ enum {
TIPC_LINK_PROTO_SEQNO = (1 << 6)
};
-#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \
- TIPC_BCAST_STATE_NACK | \
- TIPC_BCAST_RCAST | \
- TIPC_BLOCK_FLOWCTL | \
- TIPC_NODE_ID128 | \
+#define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT | \
+ TIPC_BCAST_SYNCH | \
+ TIPC_BCAST_STATE_NACK | \
+ TIPC_BCAST_RCAST | \
+ TIPC_BLOCK_FLOWCTL | \
+ TIPC_NODE_ID128 | \
TIPC_LINK_PROTO_SEQNO)
#define INVALID_BEARER_ID -1
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 49810fdff4c5..de09f514428c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -47,7 +47,7 @@
#include "netlink.h"
#include "group.h"
-#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
+#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
#define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */
#define TIPC_FWD_MSG 1
#define TIPC_MAX_PORT 0xffffffff
@@ -80,7 +80,6 @@ struct sockaddr_pair {
* @publications: list of publications for port
* @blocking_link: address of the congested link we are currently sleeping on
* @pub_count: total # of publications port has made during its lifetime
- * @probing_state:
* @conn_timeout: the time we can wait for an unresponded setup request
* @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
* @cong_link_cnt: number of congested links
@@ -102,8 +101,8 @@ struct tipc_sock {
struct list_head cong_links;
struct list_head publications;
u32 pub_count;
- uint conn_timeout;
atomic_t dupl_rcvcnt;
+ u16 conn_timeout;
bool probe_unacked;
u16 cong_link_cnt;
u16 snt_unacked;
@@ -507,6 +506,9 @@ static void __tipc_shutdown(struct socket *sock, int error)
tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
!tsk_conn_cong(tsk)));
+ /* Remove any pending SYN message */
+ __skb_queue_purge(&sk->sk_write_queue);
+
/* Reject all unreceived messages, except on an active connection
* (which disconnects locally & sends a 'FIN+' to peer).
*/
@@ -1329,6 +1331,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
tsk->conn_type = dest->addr.name.name.type;
tsk->conn_instance = dest->addr.name.name.instance;
}
+ msg_set_syn(hdr, 1);
}
seq = &dest->addr.nameseq;
@@ -1371,6 +1374,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
if (unlikely(rc != dlen))
return rc;
+ if (unlikely(syn && !tipc_msg_skb_clone(&pkts, &sk->sk_write_queue)))
+ return -ENOMEM;
rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
if (unlikely(rc == -ELINKCONG)) {
@@ -1490,6 +1495,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
struct net *net = sock_net(sk);
struct tipc_msg *msg = &tsk->phdr;
+ msg_set_syn(msg, 0);
msg_set_destnode(msg, peer_node);
msg_set_destport(msg, peer_port);
msg_set_type(msg, TIPC_CONN_MSG);
@@ -1501,6 +1507,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
+ __skb_queue_purge(&sk->sk_write_queue);
if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
return;
@@ -1971,91 +1978,90 @@ static void tipc_sk_proto_rcv(struct sock *sk,
}
/**
- * tipc_filter_connect - Handle incoming message for a connection-based socket
+ * tipc_sk_filter_connect - check incoming message for a connection-based socket
* @tsk: TIPC socket
- * @skb: pointer to message buffer. Set to NULL if buffer is consumed
- *
- * Returns true if everything ok, false otherwise
+ * @skb: pointer to message buffer.
+ * Returns true if message should be added to receive queue, false otherwise
*/
static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
{
struct sock *sk = &tsk->sk;
struct net *net = sock_net(sk);
struct tipc_msg *hdr = buf_msg(skb);
- u32 pport = msg_origport(hdr);
- u32 pnode = msg_orignode(hdr);
+ bool con_msg = msg_connected(hdr);
+ u32 pport = tsk_peer_port(tsk);
+ u32 pnode = tsk_peer_node(tsk);
+ u32 oport = msg_origport(hdr);
+ u32 onode = msg_orignode(hdr);
+ int err = msg_errcode(hdr);
+ unsigned long delay;
if (unlikely(msg_mcast(hdr)))
return false;
switch (sk->sk_state) {
case TIPC_CONNECTING:
- /* Accept only ACK or NACK message */
- if (unlikely(!msg_connected(hdr))) {
- if (pport != tsk_peer_port(tsk) ||
- pnode != tsk_peer_node(tsk))
- return false;
-
- tipc_set_sk_state(sk, TIPC_DISCONNECTING);
- sk->sk_err = ECONNREFUSED;
- sk->sk_state_change(sk);
- return true;
- }
-
- if (unlikely(msg_errcode(hdr))) {
- tipc_set_sk_state(sk, TIPC_DISCONNECTING);
- sk->sk_err = ECONNREFUSED;
- sk->sk_state_change(sk);
- return true;
- }
-
- if (unlikely(!msg_isdata(hdr))) {
- tipc_set_sk_state(sk, TIPC_DISCONNECTING);
- sk->sk_err = EINVAL;
- sk->sk_state_change(sk);
- return true;
+ /* Setup ACK */
+ if (likely(con_msg)) {
+ if (err)
+ break;
+ tipc_sk_finish_conn(tsk, oport, onode);
+ msg_set_importance(&tsk->phdr, msg_importance(hdr));
+ /* ACK+ message with data is added to receive queue */
+ if (msg_data_sz(hdr))
+ return true;
+ /* Empty ACK-, - wake up sleeping connect() and drop */
+ sk->sk_data_ready(sk);
+ msg_set_dest_droppable(hdr, 1);
+ return false;
}
+ /* Ignore connectionless message if not from listening socket */
+ if (oport != pport || onode != pnode)
+ return false;
- tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr));
- msg_set_importance(&tsk->phdr, msg_importance(hdr));
-
- /* If 'ACK+' message, add to socket receive queue */
- if (msg_data_sz(hdr))
- return true;
-
- /* If empty 'ACK-' message, wake up sleeping connect() */
- sk->sk_data_ready(sk);
+ /* Rejected SYN */
+ if (err != TIPC_ERR_OVERLOAD)
+ break;
- /* 'ACK-' message is neither accepted nor rejected: */
- msg_set_dest_droppable(hdr, 1);
+ /* Prepare for new setup attempt if we have a SYN clone */
+ if (skb_queue_empty(&sk->sk_write_queue))
+ break;
+ get_random_bytes(&delay, 2);
+ delay %= (tsk->conn_timeout / 4);
+ delay = msecs_to_jiffies(delay + 100);
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + delay);
return false;
-
case TIPC_OPEN:
case TIPC_DISCONNECTING:
- break;
+ return false;
case TIPC_LISTEN:
/* Accept only SYN message */
- if (!msg_connected(hdr) && !(msg_errcode(hdr)))
+ if (!msg_is_syn(hdr) &&
+ tipc_node_get_capabilities(net, onode) & TIPC_SYN_BIT)
+ return false;
+ if (!con_msg && !err)
return true;
- break;
+ return false;
case TIPC_ESTABLISHED:
/* Accept only connection-based messages sent by peer */
- if (unlikely(!tsk_peer_msg(tsk, hdr)))
+ if (likely(con_msg && !err && pport == oport && pnode == onode))
+ return true;
+ if (!tsk_peer_msg(tsk, hdr))
return false;
-
- if (unlikely(msg_errcode(hdr))) {
- tipc_set_sk_state(sk, TIPC_DISCONNECTING);
- /* Let timer expire on it's own */
- tipc_node_remove_conn(net, tsk_peer_node(tsk),
- tsk->portid);
- sk->sk_state_change(sk);
- }
+ if (!err)
+ return true;
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ tipc_node_remove_conn(net, pnode, tsk->portid);
+ sk->sk_state_change(sk);
return true;
default:
pr_err("Unknown sk_state %u\n", sk->sk_state);
}
-
- return false;
+ /* Abort connection setup attempt */
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ sk->sk_err = ECONNREFUSED;
+ sk->sk_state_change(sk);
+ return true;
}
/**
@@ -2557,43 +2563,78 @@ static int tipc_shutdown(struct socket *sock, int how)
return res;
}
+static void tipc_sk_check_probing_state(struct sock *sk,
+ struct sk_buff_head *list)
+{
+ struct tipc_sock *tsk = tipc_sk(sk);
+ u32 pnode = tsk_peer_node(tsk);
+ u32 pport = tsk_peer_port(tsk);
+ u32 self = tsk_own_node(tsk);
+ u32 oport = tsk->portid;
+ struct sk_buff *skb;
+
+ if (tsk->probe_unacked) {
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ sk->sk_err = ECONNABORTED;
+ tipc_node_remove_conn(sock_net(sk), pnode, pport);
+ sk->sk_state_change(sk);
+ return;
+ }
+ /* Prepare new probe */
+ skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0,
+ pnode, self, pport, oport, TIPC_OK);
+ if (skb)
+ __skb_queue_tail(list, skb);
+ tsk->probe_unacked = true;
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
+}
+
+static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list)
+{
+ struct tipc_sock *tsk = tipc_sk(sk);
+
+ /* Try again later if dest link is congested */
+ if (tsk->cong_link_cnt) {
+ sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100));
+ return;
+ }
+ /* Prepare SYN for retransmit */
+ tipc_msg_skb_clone(&sk->sk_write_queue, list);
+}
+
static void tipc_sk_timeout(struct timer_list *t)
{
struct sock *sk = from_timer(sk, t, sk_timer);
struct tipc_sock *tsk = tipc_sk(sk);
- u32 peer_port = tsk_peer_port(tsk);
- u32 peer_node = tsk_peer_node(tsk);
- u32 own_node = tsk_own_node(tsk);
- u32 own_port = tsk->portid;
- struct net *net = sock_net(sk);
- struct sk_buff *skb = NULL;
+ u32 pnode = tsk_peer_node(tsk);
+ struct sk_buff_head list;
+ int rc = 0;
+ skb_queue_head_init(&list);
bh_lock_sock(sk);
- if (!tipc_sk_connected(sk))
- goto exit;
/* Try again later if socket is busy */
if (sock_owned_by_user(sk)) {
sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20);
- goto exit;
+ bh_unlock_sock(sk);
+ return;
}
- if (tsk->probe_unacked) {
- tipc_set_sk_state(sk, TIPC_DISCONNECTING);
- tipc_node_remove_conn(net, peer_node, peer_port);
- sk->sk_state_change(sk);
- goto exit;
- }
- /* Send new probe */
- skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0,
- peer_node, own_node, peer_port, own_port,
- TIPC_OK);
- tsk->probe_unacked = true;
- sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
-exit:
+ if (sk->sk_state == TIPC_ESTABLISHED)
+ tipc_sk_check_probing_state(sk, &list);
+ else if (sk->sk_state == TIPC_CONNECTING)
+ tipc_sk_retry_connect(sk, &list);
+
bh_unlock_sock(sk);
- if (skb)
- tipc_node_xmit_skb(net, skb, peer_node, own_port);
+
+ if (!skb_queue_empty(&list))
+ rc = tipc_node_xmit(sock_net(sk), &list, pnode, tsk->portid);
+
+ /* SYN messages may cause link congestion */
+ if (rc == -ELINKCONG) {
+ tipc_dest_push(&tsk->cong_links, pnode, 0);
+ tsk->cong_link_cnt = 1;
+ }
sock_put(sk);
}
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 2627b5d812e9..d8956f7daac4 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -57,16 +57,12 @@
* @idr_lock: protect the connection identifier set
* @idr_in_use: amount of allocated identifier entry
* @net: network namspace instance
- * @rcvbuf_cache: memory cache of server receive buffer
+ * @awork: accept work item
* @rcv_wq: receive workqueue
* @send_wq: send workqueue
* @max_rcvbuf_size: maximum permitted receive message length
- * @tipc_conn_new: callback will be called when new connection is incoming
- * @tipc_conn_release: callback will be called before releasing the connection
- * @tipc_conn_recvmsg: callback will be called when message arrives
+ * @listener: topsrv listener socket
* @name: server name
- * @imp: message importance
- * @type: socket type
*/
struct tipc_topsrv {
struct idr conn_idr;
@@ -90,9 +86,7 @@ struct tipc_topsrv {
* @server: pointer to connected server
* @sub_list: lsit to all pertaing subscriptions
* @sub_lock: lock protecting the subscription list
- * @outqueue_lock: control access to the outqueue
* @rwork: receive work item
- * @rx_action: what to do when connection socket is active
* @outqueue: pointer to first outbound message in queue
* @outqueue_lock: control access to the outqueue
* @swork: send work item
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 523622dc74f8..b428069a1b05 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -141,7 +141,6 @@ retry:
size = sg->length;
}
- clear_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags);
ctx->in_tcp_sendpages = false;
ctx->sk_write_space(sk);
@@ -193,15 +192,12 @@ int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
return rc;
}
-int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx,
- int flags, long *timeo)
+int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
+ int flags)
{
struct scatterlist *sg;
u16 offset;
- if (!tls_is_partially_sent_record(ctx))
- return ctx->push_pending_record(sk, flags);
-
sg = ctx->partially_sent_record;
offset = ctx->partially_sent_offset;
@@ -209,9 +205,23 @@ int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx,
return tls_push_sg(sk, ctx, sg, offset, flags);
}
+int tls_push_pending_closed_record(struct sock *sk,
+ struct tls_context *tls_ctx,
+ int flags, long *timeo)
+{
+ struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+
+ if (tls_is_partially_sent_record(tls_ctx) ||
+ !list_empty(&ctx->tx_list))
+ return tls_tx_records(sk, flags);
+ else
+ return tls_ctx->push_pending_record(sk, flags);
+}
+
static void tls_write_space(struct sock *sk)
{
struct tls_context *ctx = tls_get_ctx(sk);
+ struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx);
/* If in_tcp_sendpages call lower protocol write space handler
* to ensure we wake up any waiting operations there. For example
@@ -222,20 +232,11 @@ static void tls_write_space(struct sock *sk)
return;
}
- if (!sk->sk_write_pending && tls_is_pending_closed_record(ctx)) {
- gfp_t sk_allocation = sk->sk_allocation;
- int rc;
- long timeo = 0;
-
- sk->sk_allocation = GFP_ATOMIC;
- rc = tls_push_pending_closed_record(sk, ctx,
- MSG_DONTWAIT |
- MSG_NOSIGNAL,
- &timeo);
- sk->sk_allocation = sk_allocation;
-
- if (rc < 0)
- return;
+ /* Schedule the transmission if tx list is ready */
+ if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) {
+ /* Schedule the transmission */
+ if (!test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx->tx_bitmask))
+ schedule_delayed_work(&tx_ctx->tx_work.work, 0);
}
ctx->sk_write_space(sk);
@@ -270,19 +271,6 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
if (!tls_complete_pending_work(sk, ctx, 0, &timeo))
tls_handle_open_record(sk, 0);
- if (ctx->partially_sent_record) {
- struct scatterlist *sg = ctx->partially_sent_record;
-
- while (1) {
- put_page(sg_page(sg));
- sk_mem_uncharge(sk, sg->length);
-
- if (sg_is_last(sg))
- break;
- sg++;
- }
- }
-
/* We need these for tls_sw_fallback handling of other packets */
if (ctx->tx_conf == TLS_SW) {
kfree(ctx->tx.rec_seq);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index b9c6ecfbcfea..aa9fdce272b6 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -43,12 +43,133 @@
#define MAX_IV_SIZE TLS_CIPHER_AES_GCM_128_IV_SIZE
+static int __skb_nsg(struct sk_buff *skb, int offset, int len,
+ unsigned int recursion_level)
+{
+ int start = skb_headlen(skb);
+ int i, chunk = start - offset;
+ struct sk_buff *frag_iter;
+ int elt = 0;
+
+ if (unlikely(recursion_level >= 24))
+ return -EMSGSIZE;
+
+ if (chunk > 0) {
+ if (chunk > len)
+ chunk = len;
+ elt++;
+ len -= chunk;
+ if (len == 0)
+ return elt;
+ offset += chunk;
+ }
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ int end;
+
+ WARN_ON(start > offset + len);
+
+ end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
+ chunk = end - offset;
+ if (chunk > 0) {
+ if (chunk > len)
+ chunk = len;
+ elt++;
+ len -= chunk;
+ if (len == 0)
+ return elt;
+ offset += chunk;
+ }
+ start = end;
+ }
+
+ if (unlikely(skb_has_frag_list(skb))) {
+ skb_walk_frags(skb, frag_iter) {
+ int end, ret;
+
+ WARN_ON(start > offset + len);
+
+ end = start + frag_iter->len;
+ chunk = end - offset;
+ if (chunk > 0) {
+ if (chunk > len)
+ chunk = len;
+ ret = __skb_nsg(frag_iter, offset - start, chunk,
+ recursion_level + 1);
+ if (unlikely(ret < 0))
+ return ret;
+ elt += ret;
+ len -= chunk;
+ if (len == 0)
+ return elt;
+ offset += chunk;
+ }
+ start = end;
+ }
+ }
+ BUG_ON(len);
+ return elt;
+}
+
+/* Return the number of scatterlist elements required to completely map the
+ * skb, or -EMSGSIZE if the recursion depth is exceeded.
+ */
+static int skb_nsg(struct sk_buff *skb, int offset, int len)
+{
+ return __skb_nsg(skb, offset, len, 0);
+}
+
+static void tls_decrypt_done(struct crypto_async_request *req, int err)
+{
+ struct aead_request *aead_req = (struct aead_request *)req;
+ struct scatterlist *sgout = aead_req->dst;
+ struct tls_sw_context_rx *ctx;
+ struct tls_context *tls_ctx;
+ struct scatterlist *sg;
+ struct sk_buff *skb;
+ unsigned int pages;
+ int pending;
+
+ skb = (struct sk_buff *)req->data;
+ tls_ctx = tls_get_ctx(skb->sk);
+ ctx = tls_sw_ctx_rx(tls_ctx);
+ pending = atomic_dec_return(&ctx->decrypt_pending);
+
+ /* Propagate if there was an err */
+ if (err) {
+ ctx->async_wait.err = err;
+ tls_err_abort(skb->sk, err);
+ }
+
+ /* After using skb->sk to propagate sk through crypto async callback
+ * we need to NULL it again.
+ */
+ skb->sk = NULL;
+
+ /* Release the skb, pages and memory allocated for crypto req */
+ kfree_skb(skb);
+
+ /* Skip the first S/G entry as it points to AAD */
+ for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) {
+ if (!sg)
+ break;
+ put_page(sg_page(sg));
+ }
+
+ kfree(aead_req);
+
+ if (!pending && READ_ONCE(ctx->async_notify))
+ complete(&ctx->async_wait.completion);
+}
+
static int tls_do_decryption(struct sock *sk,
+ struct sk_buff *skb,
struct scatterlist *sgin,
struct scatterlist *sgout,
char *iv_recv,
size_t data_len,
- struct aead_request *aead_req)
+ struct aead_request *aead_req,
+ bool async)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
@@ -59,10 +180,36 @@ static int tls_do_decryption(struct sock *sk,
aead_request_set_crypt(aead_req, sgin, sgout,
data_len + tls_ctx->rx.tag_size,
(u8 *)iv_recv);
- aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
- crypto_req_done, &ctx->async_wait);
- ret = crypto_wait_req(crypto_aead_decrypt(aead_req), &ctx->async_wait);
+ if (async) {
+ /* Using skb->sk to push sk through to crypto async callback
+ * handler. This allows propagating errors up to the socket
+ * if needed. It _must_ be cleared in the async handler
+ * before kfree_skb is called. We _know_ skb->sk is NULL
+ * because it is a clone from strparser.
+ */
+ skb->sk = sk;
+ aead_request_set_callback(aead_req,
+ CRYPTO_TFM_REQ_MAY_BACKLOG,
+ tls_decrypt_done, skb);
+ atomic_inc(&ctx->decrypt_pending);
+ } else {
+ aead_request_set_callback(aead_req,
+ CRYPTO_TFM_REQ_MAY_BACKLOG,
+ crypto_req_done, &ctx->async_wait);
+ }
+
+ ret = crypto_aead_decrypt(aead_req);
+ if (ret == -EINPROGRESS) {
+ if (async)
+ return ret;
+
+ ret = crypto_wait_req(ret, &ctx->async_wait);
+ }
+
+ if (async)
+ atomic_dec(&ctx->decrypt_pending);
+
return ret;
}
@@ -99,18 +246,19 @@ static void trim_both_sgl(struct sock *sk, int target_size)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+ struct tls_rec *rec = ctx->open_rec;
- trim_sg(sk, ctx->sg_plaintext_data,
- &ctx->sg_plaintext_num_elem,
- &ctx->sg_plaintext_size,
+ trim_sg(sk, &rec->sg_plaintext_data[1],
+ &rec->sg_plaintext_num_elem,
+ &rec->sg_plaintext_size,
target_size);
if (target_size > 0)
target_size += tls_ctx->tx.overhead_size;
- trim_sg(sk, ctx->sg_encrypted_data,
- &ctx->sg_encrypted_num_elem,
- &ctx->sg_encrypted_size,
+ trim_sg(sk, &rec->sg_encrypted_data[1],
+ &rec->sg_encrypted_num_elem,
+ &rec->sg_encrypted_size,
target_size);
}
@@ -118,33 +266,87 @@ static int alloc_encrypted_sg(struct sock *sk, int len)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+ struct tls_rec *rec = ctx->open_rec;
int rc = 0;
rc = sk_alloc_sg(sk, len,
- ctx->sg_encrypted_data, 0,
- &ctx->sg_encrypted_num_elem,
- &ctx->sg_encrypted_size, 0);
+ &rec->sg_encrypted_data[1], 0,
+ &rec->sg_encrypted_num_elem,
+ &rec->sg_encrypted_size, 0);
if (rc == -ENOSPC)
- ctx->sg_encrypted_num_elem = ARRAY_SIZE(ctx->sg_encrypted_data);
+ rec->sg_encrypted_num_elem =
+ ARRAY_SIZE(rec->sg_encrypted_data) - 1;
return rc;
}
-static int alloc_plaintext_sg(struct sock *sk, int len)
+static int move_to_plaintext_sg(struct sock *sk, int required_size)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
- int rc = 0;
+ struct tls_rec *rec = ctx->open_rec;
+ struct scatterlist *plain_sg = &rec->sg_plaintext_data[1];
+ struct scatterlist *enc_sg = &rec->sg_encrypted_data[1];
+ int enc_sg_idx = 0;
+ int skip, len;
+
+ if (rec->sg_plaintext_num_elem == MAX_SKB_FRAGS)
+ return -ENOSPC;
+
+ /* We add page references worth len bytes from enc_sg at the
+ * end of plain_sg. It is guaranteed that sg_encrypted_data
+ * has enough required room (ensured by caller).
+ */
+ len = required_size - rec->sg_plaintext_size;
- rc = sk_alloc_sg(sk, len, ctx->sg_plaintext_data, 0,
- &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size,
- tls_ctx->pending_open_record_frags);
+ /* Skip initial bytes in sg_encrypted_data to be able
+ * to use same offset of both plain and encrypted data.
+ */
+ skip = tls_ctx->tx.prepend_size + rec->sg_plaintext_size;
- if (rc == -ENOSPC)
- ctx->sg_plaintext_num_elem = ARRAY_SIZE(ctx->sg_plaintext_data);
+ while (enc_sg_idx < rec->sg_encrypted_num_elem) {
+ if (enc_sg[enc_sg_idx].length > skip)
+ break;
- return rc;
+ skip -= enc_sg[enc_sg_idx].length;
+ enc_sg_idx++;
+ }
+
+ /* unmark the end of plain_sg*/
+ sg_unmark_end(plain_sg + rec->sg_plaintext_num_elem - 1);
+
+ while (len) {
+ struct page *page = sg_page(&enc_sg[enc_sg_idx]);
+ int bytes = enc_sg[enc_sg_idx].length - skip;
+ int offset = enc_sg[enc_sg_idx].offset + skip;
+
+ if (bytes > len)
+ bytes = len;
+ else
+ enc_sg_idx++;
+
+ /* Skipping is required only one time */
+ skip = 0;
+
+ /* Increment page reference */
+ get_page(page);
+
+ sg_set_page(&plain_sg[rec->sg_plaintext_num_elem], page,
+ bytes, offset);
+
+ sk_mem_charge(sk, bytes);
+
+ len -= bytes;
+ rec->sg_plaintext_size += bytes;
+
+ rec->sg_plaintext_num_elem++;
+
+ if (rec->sg_plaintext_num_elem == MAX_SKB_FRAGS)
+ return -ENOSPC;
+ }
+
+ return 0;
}
static void free_sg(struct sock *sk, struct scatterlist *sg,
@@ -160,41 +362,192 @@ static void free_sg(struct sock *sk, struct scatterlist *sg,
*sg_size = 0;
}
-static void tls_free_both_sg(struct sock *sk)
+static void tls_free_open_rec(struct sock *sk)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+ struct tls_rec *rec = ctx->open_rec;
- free_sg(sk, ctx->sg_encrypted_data, &ctx->sg_encrypted_num_elem,
- &ctx->sg_encrypted_size);
+ /* Return if there is no open record */
+ if (!rec)
+ return;
- free_sg(sk, ctx->sg_plaintext_data, &ctx->sg_plaintext_num_elem,
- &ctx->sg_plaintext_size);
+ free_sg(sk, &rec->sg_encrypted_data[1],
+ &rec->sg_encrypted_num_elem,
+ &rec->sg_encrypted_size);
+
+ free_sg(sk, &rec->sg_plaintext_data[1],
+ &rec->sg_plaintext_num_elem,
+ &rec->sg_plaintext_size);
+
+ kfree(rec);
}
-static int tls_do_encryption(struct tls_context *tls_ctx,
+int tls_tx_records(struct sock *sk, int flags)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+ struct tls_rec *rec, *tmp;
+ int tx_flags, rc = 0;
+
+ if (tls_is_partially_sent_record(tls_ctx)) {
+ rec = list_first_entry(&ctx->tx_list,
+ struct tls_rec, list);
+
+ if (flags == -1)
+ tx_flags = rec->tx_flags;
+ else
+ tx_flags = flags;
+
+ rc = tls_push_partial_record(sk, tls_ctx, tx_flags);
+ if (rc)
+ goto tx_err;
+
+ /* Full record has been transmitted.
+ * Remove the head of tx_list
+ */
+ list_del(&rec->list);
+ free_sg(sk, &rec->sg_plaintext_data[1],
+ &rec->sg_plaintext_num_elem, &rec->sg_plaintext_size);
+
+ kfree(rec);
+ }
+
+ /* Tx all ready records */
+ list_for_each_entry_safe(rec, tmp, &ctx->tx_list, list) {
+ if (READ_ONCE(rec->tx_ready)) {
+ if (flags == -1)
+ tx_flags = rec->tx_flags;
+ else
+ tx_flags = flags;
+
+ rc = tls_push_sg(sk, tls_ctx,
+ &rec->sg_encrypted_data[1],
+ 0, tx_flags);
+ if (rc)
+ goto tx_err;
+
+ list_del(&rec->list);
+ free_sg(sk, &rec->sg_plaintext_data[1],
+ &rec->sg_plaintext_num_elem,
+ &rec->sg_plaintext_size);
+
+ kfree(rec);
+ } else {
+ break;
+ }
+ }
+
+tx_err:
+ if (rc < 0 && rc != -EAGAIN)
+ tls_err_abort(sk, EBADMSG);
+
+ return rc;
+}
+
+static void tls_encrypt_done(struct crypto_async_request *req, int err)
+{
+ struct aead_request *aead_req = (struct aead_request *)req;
+ struct sock *sk = req->data;
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+ struct tls_rec *rec;
+ bool ready = false;
+ int pending;
+
+ rec = container_of(aead_req, struct tls_rec, aead_req);
+
+ rec->sg_encrypted_data[1].offset -= tls_ctx->tx.prepend_size;
+ rec->sg_encrypted_data[1].length += tls_ctx->tx.prepend_size;
+
+
+ /* Check if error is previously set on socket */
+ if (err || sk->sk_err) {
+ rec = NULL;
+
+ /* If err is already set on socket, return the same code */
+ if (sk->sk_err) {
+ ctx->async_wait.err = sk->sk_err;
+ } else {
+ ctx->async_wait.err = err;
+ tls_err_abort(sk, err);
+ }
+ }
+
+ if (rec) {
+ struct tls_rec *first_rec;
+
+ /* Mark the record as ready for transmission */
+ smp_store_mb(rec->tx_ready, true);
+
+ /* If received record is at head of tx_list, schedule tx */
+ first_rec = list_first_entry(&ctx->tx_list,
+ struct tls_rec, list);
+ if (rec == first_rec)
+ ready = true;
+ }
+
+ pending = atomic_dec_return(&ctx->encrypt_pending);
+
+ if (!pending && READ_ONCE(ctx->async_notify))
+ complete(&ctx->async_wait.completion);
+
+ if (!ready)
+ return;
+
+ /* Schedule the transmission */
+ if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
+ schedule_delayed_work(&ctx->tx_work.work, 2);
+}
+
+static int tls_do_encryption(struct sock *sk,
+ struct tls_context *tls_ctx,
struct tls_sw_context_tx *ctx,
struct aead_request *aead_req,
size_t data_len)
{
+ struct tls_rec *rec = ctx->open_rec;
+ struct scatterlist *plain_sg = rec->sg_plaintext_data;
+ struct scatterlist *enc_sg = rec->sg_encrypted_data;
int rc;
- ctx->sg_encrypted_data[0].offset += tls_ctx->tx.prepend_size;
- ctx->sg_encrypted_data[0].length -= tls_ctx->tx.prepend_size;
+ /* Skip the first index as it contains AAD data */
+ rec->sg_encrypted_data[1].offset += tls_ctx->tx.prepend_size;
+ rec->sg_encrypted_data[1].length -= tls_ctx->tx.prepend_size;
+
+ /* If it is inplace crypto, then pass same SG list as both src, dst */
+ if (rec->inplace_crypto)
+ plain_sg = enc_sg;
aead_request_set_tfm(aead_req, ctx->aead_send);
aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
- aead_request_set_crypt(aead_req, ctx->sg_aead_in, ctx->sg_aead_out,
+ aead_request_set_crypt(aead_req, plain_sg, enc_sg,
data_len, tls_ctx->tx.iv);
aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
- crypto_req_done, &ctx->async_wait);
+ tls_encrypt_done, sk);
- rc = crypto_wait_req(crypto_aead_encrypt(aead_req), &ctx->async_wait);
+ /* Add the record in tx_list */
+ list_add_tail((struct list_head *)&rec->list, &ctx->tx_list);
+ atomic_inc(&ctx->encrypt_pending);
- ctx->sg_encrypted_data[0].offset -= tls_ctx->tx.prepend_size;
- ctx->sg_encrypted_data[0].length += tls_ctx->tx.prepend_size;
+ rc = crypto_aead_encrypt(aead_req);
+ if (!rc || rc != -EINPROGRESS) {
+ atomic_dec(&ctx->encrypt_pending);
+ rec->sg_encrypted_data[1].offset -= tls_ctx->tx.prepend_size;
+ rec->sg_encrypted_data[1].length += tls_ctx->tx.prepend_size;
+ }
+
+ if (!rc) {
+ WRITE_ONCE(rec->tx_ready, true);
+ } else if (rc != -EINPROGRESS) {
+ list_del(&rec->list);
+ return rc;
+ }
+ /* Unhook the record from context if encryption is not failure */
+ ctx->open_rec = NULL;
+ tls_advance_record_sn(sk, &tls_ctx->tx);
return rc;
}
@@ -203,53 +556,40 @@ static int tls_push_record(struct sock *sk, int flags,
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+ struct tls_rec *rec = ctx->open_rec;
struct aead_request *req;
int rc;
- req = aead_request_alloc(ctx->aead_send, sk->sk_allocation);
- if (!req)
- return -ENOMEM;
+ if (!rec)
+ return 0;
+
+ rec->tx_flags = flags;
+ req = &rec->aead_req;
- sg_mark_end(ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem - 1);
- sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1);
+ sg_mark_end(rec->sg_plaintext_data + rec->sg_plaintext_num_elem);
+ sg_mark_end(rec->sg_encrypted_data + rec->sg_encrypted_num_elem);
- tls_make_aad(ctx->aad_space, ctx->sg_plaintext_size,
+ tls_make_aad(rec->aad_space, rec->sg_plaintext_size,
tls_ctx->tx.rec_seq, tls_ctx->tx.rec_seq_size,
record_type);
tls_fill_prepend(tls_ctx,
- page_address(sg_page(&ctx->sg_encrypted_data[0])) +
- ctx->sg_encrypted_data[0].offset,
- ctx->sg_plaintext_size, record_type);
+ page_address(sg_page(&rec->sg_encrypted_data[1])) +
+ rec->sg_encrypted_data[1].offset,
+ rec->sg_plaintext_size, record_type);
tls_ctx->pending_open_record_frags = 0;
- set_bit(TLS_PENDING_CLOSED_RECORD, &tls_ctx->flags);
- rc = tls_do_encryption(tls_ctx, ctx, req, ctx->sg_plaintext_size);
- if (rc < 0) {
- /* If we are called from write_space and
- * we fail, we need to set this SOCK_NOSPACE
- * to trigger another write_space in the future.
- */
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- goto out_req;
- }
+ rc = tls_do_encryption(sk, tls_ctx, ctx, req, rec->sg_plaintext_size);
+ if (rc == -EINPROGRESS)
+ return -EINPROGRESS;
- free_sg(sk, ctx->sg_plaintext_data, &ctx->sg_plaintext_num_elem,
- &ctx->sg_plaintext_size);
-
- ctx->sg_encrypted_num_elem = 0;
- ctx->sg_encrypted_size = 0;
-
- /* Only pass through MSG_DONTWAIT and MSG_NOSIGNAL flags */
- rc = tls_push_sg(sk, tls_ctx, ctx->sg_encrypted_data, 0, flags);
- if (rc < 0 && rc != -EAGAIN)
+ if (rc < 0) {
tls_err_abort(sk, EBADMSG);
+ return rc;
+ }
- tls_advance_record_sn(sk, &tls_ctx->tx);
-out_req:
- aead_request_free(req);
- return rc;
+ return tls_tx_records(sk, flags);
}
static int tls_sw_push_pending_record(struct sock *sk, int flags)
@@ -326,11 +666,12 @@ static int memcopy_from_iter(struct sock *sk, struct iov_iter *from,
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
- struct scatterlist *sg = ctx->sg_plaintext_data;
+ struct tls_rec *rec = ctx->open_rec;
+ struct scatterlist *sg = &rec->sg_plaintext_data[1];
int copy, i, rc = 0;
for (i = tls_ctx->pending_open_record_frags;
- i < ctx->sg_plaintext_num_elem; ++i) {
+ i < rec->sg_plaintext_num_elem; ++i) {
copy = sg[i].length;
if (copy_from_iter(
page_address(sg_page(&sg[i])) + sg[i].offset,
@@ -350,33 +691,79 @@ out:
return rc;
}
-int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+static struct tls_rec *get_rec(struct sock *sk)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
- int ret = 0;
- int required_size;
+ struct tls_rec *rec;
+ int mem_size;
+
+ /* Return if we already have an open record */
+ if (ctx->open_rec)
+ return ctx->open_rec;
+
+ mem_size = sizeof(struct tls_rec) + crypto_aead_reqsize(ctx->aead_send);
+
+ rec = kzalloc(mem_size, sk->sk_allocation);
+ if (!rec)
+ return NULL;
+
+ sg_init_table(&rec->sg_plaintext_data[0],
+ ARRAY_SIZE(rec->sg_plaintext_data));
+ sg_init_table(&rec->sg_encrypted_data[0],
+ ARRAY_SIZE(rec->sg_encrypted_data));
+
+ sg_set_buf(&rec->sg_plaintext_data[0], rec->aad_space,
+ sizeof(rec->aad_space));
+ sg_set_buf(&rec->sg_encrypted_data[0], rec->aad_space,
+ sizeof(rec->aad_space));
+
+ ctx->open_rec = rec;
+ rec->inplace_crypto = 1;
+
+ return rec;
+}
+
+int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+ struct crypto_tfm *tfm = crypto_aead_tfm(ctx->aead_send);
+ bool async_capable = tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
+ unsigned char record_type = TLS_RECORD_TYPE_DATA;
+ bool is_kvec = msg->msg_iter.type & ITER_KVEC;
bool eor = !(msg->msg_flags & MSG_MORE);
size_t try_to_copy, copied = 0;
- unsigned char record_type = TLS_RECORD_TYPE_DATA;
- int record_room;
+ struct tls_rec *rec;
+ int required_size;
+ int num_async = 0;
bool full_record;
+ int record_room;
+ int num_zc = 0;
int orig_size;
- bool is_kvec = msg->msg_iter.type & ITER_KVEC;
+ int ret = 0;
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
return -ENOTSUPP;
lock_sock(sk);
- if (tls_complete_pending_work(sk, tls_ctx, msg->msg_flags, &timeo))
- goto send_end;
+ /* Wait till there is any pending write on socket */
+ if (unlikely(sk->sk_write_pending)) {
+ ret = wait_on_pending_writer(sk, &timeo);
+ if (unlikely(ret))
+ goto send_end;
+ }
if (unlikely(msg->msg_controllen)) {
ret = tls_proccess_cmsg(sk, msg, &record_type);
- if (ret)
- goto send_end;
+ if (ret) {
+ if (ret == -EINPROGRESS)
+ num_async++;
+ else if (ret != -EAGAIN)
+ goto send_end;
+ }
}
while (msg_data_left(msg)) {
@@ -385,20 +772,27 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
goto send_end;
}
- orig_size = ctx->sg_plaintext_size;
+ rec = get_rec(sk);
+ if (!rec) {
+ ret = -ENOMEM;
+ goto send_end;
+ }
+
+ orig_size = rec->sg_plaintext_size;
full_record = false;
try_to_copy = msg_data_left(msg);
- record_room = TLS_MAX_PAYLOAD_SIZE - ctx->sg_plaintext_size;
+ record_room = TLS_MAX_PAYLOAD_SIZE - rec->sg_plaintext_size;
if (try_to_copy >= record_room) {
try_to_copy = record_room;
full_record = true;
}
- required_size = ctx->sg_plaintext_size + try_to_copy +
+ required_size = rec->sg_plaintext_size + try_to_copy +
tls_ctx->tx.overhead_size;
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
+
alloc_encrypted:
ret = alloc_encrypted_sg(sk, required_size);
if (ret) {
@@ -409,50 +803,58 @@ alloc_encrypted:
* actually allocated. The difference is due
* to max sg elements limit
*/
- try_to_copy -= required_size - ctx->sg_encrypted_size;
+ try_to_copy -= required_size - rec->sg_encrypted_size;
full_record = true;
}
- if (!is_kvec && (full_record || eor)) {
+
+ if (!is_kvec && (full_record || eor) && !async_capable) {
ret = zerocopy_from_iter(sk, &msg->msg_iter,
- try_to_copy, &ctx->sg_plaintext_num_elem,
- &ctx->sg_plaintext_size,
- ctx->sg_plaintext_data,
- ARRAY_SIZE(ctx->sg_plaintext_data),
+ try_to_copy, &rec->sg_plaintext_num_elem,
+ &rec->sg_plaintext_size,
+ &rec->sg_plaintext_data[1],
+ ARRAY_SIZE(rec->sg_plaintext_data) - 1,
true);
if (ret)
goto fallback_to_reg_send;
+ rec->inplace_crypto = 0;
+
+ num_zc++;
copied += try_to_copy;
ret = tls_push_record(sk, msg->msg_flags, record_type);
- if (ret)
- goto send_end;
+ if (ret) {
+ if (ret == -EINPROGRESS)
+ num_async++;
+ else if (ret != -EAGAIN)
+ goto send_end;
+ }
continue;
fallback_to_reg_send:
- trim_sg(sk, ctx->sg_plaintext_data,
- &ctx->sg_plaintext_num_elem,
- &ctx->sg_plaintext_size,
+ trim_sg(sk, &rec->sg_plaintext_data[1],
+ &rec->sg_plaintext_num_elem,
+ &rec->sg_plaintext_size,
orig_size);
}
- required_size = ctx->sg_plaintext_size + try_to_copy;
-alloc_plaintext:
- ret = alloc_plaintext_sg(sk, required_size);
+ required_size = rec->sg_plaintext_size + try_to_copy;
+
+ ret = move_to_plaintext_sg(sk, required_size);
if (ret) {
if (ret != -ENOSPC)
- goto wait_for_memory;
+ goto send_end;
/* Adjust try_to_copy according to the amount that was
* actually allocated. The difference is due
* to max sg elements limit
*/
- try_to_copy -= required_size - ctx->sg_plaintext_size;
+ try_to_copy -= required_size - rec->sg_plaintext_size;
full_record = true;
- trim_sg(sk, ctx->sg_encrypted_data,
- &ctx->sg_encrypted_num_elem,
- &ctx->sg_encrypted_size,
- ctx->sg_plaintext_size +
+ trim_sg(sk, &rec->sg_encrypted_data[1],
+ &rec->sg_encrypted_num_elem,
+ &rec->sg_encrypted_size,
+ rec->sg_plaintext_size +
tls_ctx->tx.overhead_size);
}
@@ -462,13 +864,12 @@ alloc_plaintext:
copied += try_to_copy;
if (full_record || eor) {
-push_record:
ret = tls_push_record(sk, msg->msg_flags, record_type);
if (ret) {
- if (ret == -ENOMEM)
- goto wait_for_memory;
-
- goto send_end;
+ if (ret == -EINPROGRESS)
+ num_async++;
+ else if (ret != -EAGAIN)
+ goto send_end;
}
}
@@ -484,13 +885,33 @@ trim_sgl:
goto send_end;
}
- if (tls_is_pending_closed_record(tls_ctx))
- goto push_record;
-
- if (ctx->sg_encrypted_size < required_size)
+ if (rec->sg_encrypted_size < required_size)
goto alloc_encrypted;
+ }
+
+ if (!num_async) {
+ goto send_end;
+ } else if (num_zc) {
+ /* Wait for pending encryptions to get completed */
+ smp_store_mb(ctx->async_notify, true);
+
+ if (atomic_read(&ctx->encrypt_pending))
+ crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ else
+ reinit_completion(&ctx->async_wait.completion);
+
+ WRITE_ONCE(ctx->async_notify, false);
- goto alloc_plaintext;
+ if (ctx->async_wait.err) {
+ ret = ctx->async_wait.err;
+ copied = 0;
+ }
+ }
+
+ /* Transmit if any encryptions have completed */
+ if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) {
+ cancel_delayed_work(&ctx->tx_work.work);
+ tls_tx_records(sk, msg->msg_flags);
}
send_end:
@@ -503,16 +924,18 @@ send_end:
int tls_sw_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags)
{
+ long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
- int ret = 0;
- long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
- bool eor;
- size_t orig_size = size;
unsigned char record_type = TLS_RECORD_TYPE_DATA;
+ size_t orig_size = size;
struct scatterlist *sg;
+ struct tls_rec *rec;
+ int num_async = 0;
bool full_record;
int record_room;
+ int ret = 0;
+ bool eor;
if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
MSG_SENDPAGE_NOTLAST))
@@ -525,8 +948,12 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
- if (tls_complete_pending_work(sk, tls_ctx, flags, &timeo))
- goto sendpage_end;
+ /* Wait till there is any pending write on socket */
+ if (unlikely(sk->sk_write_pending)) {
+ ret = wait_on_pending_writer(sk, &timeo);
+ if (unlikely(ret))
+ goto sendpage_end;
+ }
/* Call the sk_stream functions to manage the sndbuf mem. */
while (size > 0) {
@@ -537,14 +964,20 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
goto sendpage_end;
}
+ rec = get_rec(sk);
+ if (!rec) {
+ ret = -ENOMEM;
+ goto sendpage_end;
+ }
+
full_record = false;
- record_room = TLS_MAX_PAYLOAD_SIZE - ctx->sg_plaintext_size;
+ record_room = TLS_MAX_PAYLOAD_SIZE - rec->sg_plaintext_size;
copy = size;
if (copy >= record_room) {
copy = record_room;
full_record = true;
}
- required_size = ctx->sg_plaintext_size + copy +
+ required_size = rec->sg_plaintext_size + copy +
tls_ctx->tx.overhead_size;
if (!sk_stream_memory_free(sk))
@@ -559,33 +992,33 @@ alloc_payload:
* actually allocated. The difference is due
* to max sg elements limit
*/
- copy -= required_size - ctx->sg_plaintext_size;
+ copy -= required_size - rec->sg_plaintext_size;
full_record = true;
}
get_page(page);
- sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem;
+ sg = &rec->sg_plaintext_data[1] + rec->sg_plaintext_num_elem;
sg_set_page(sg, page, copy, offset);
sg_unmark_end(sg);
- ctx->sg_plaintext_num_elem++;
+ rec->sg_plaintext_num_elem++;
sk_mem_charge(sk, copy);
offset += copy;
size -= copy;
- ctx->sg_plaintext_size += copy;
- tls_ctx->pending_open_record_frags = ctx->sg_plaintext_num_elem;
+ rec->sg_plaintext_size += copy;
+ tls_ctx->pending_open_record_frags = rec->sg_plaintext_num_elem;
if (full_record || eor ||
- ctx->sg_plaintext_num_elem ==
- ARRAY_SIZE(ctx->sg_plaintext_data)) {
-push_record:
+ rec->sg_plaintext_num_elem ==
+ ARRAY_SIZE(rec->sg_plaintext_data) - 1) {
+ rec->inplace_crypto = 0;
ret = tls_push_record(sk, flags, record_type);
if (ret) {
- if (ret == -ENOMEM)
- goto wait_for_memory;
-
- goto sendpage_end;
+ if (ret == -EINPROGRESS)
+ num_async++;
+ else if (ret != -EAGAIN)
+ goto sendpage_end;
}
}
continue;
@@ -594,16 +1027,20 @@ wait_for_sndbuf:
wait_for_memory:
ret = sk_stream_wait_memory(sk, &timeo);
if (ret) {
- trim_both_sgl(sk, ctx->sg_plaintext_size);
+ trim_both_sgl(sk, rec->sg_plaintext_size);
goto sendpage_end;
}
- if (tls_is_pending_closed_record(tls_ctx))
- goto push_record;
-
goto alloc_payload;
}
+ if (num_async) {
+ /* Transmit if any encryptions have completed */
+ if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) {
+ cancel_delayed_work(&ctx->tx_work.work);
+ tls_tx_records(sk, flags);
+ }
+ }
sendpage_end:
if (orig_size > size)
ret = orig_size - size;
@@ -684,12 +1121,14 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
n_sgout = iov_iter_npages(out_iov, INT_MAX) + 1;
else
n_sgout = sg_nents(out_sg);
+ n_sgin = skb_nsg(skb, rxm->offset + tls_ctx->rx.prepend_size,
+ rxm->full_len - tls_ctx->rx.prepend_size);
} else {
n_sgout = 0;
*zc = false;
+ n_sgin = skb_cow_data(skb, 0, &unused);
}
- n_sgin = skb_cow_data(skb, 0, &unused);
if (n_sgin < 1)
return -EBADMSG;
@@ -769,7 +1208,10 @@ fallback_to_reg_recv:
}
/* Prepare and submit AEAD request */
- err = tls_do_decryption(sk, sgin, sgout, iv, data_len, aead_req);
+ err = tls_do_decryption(sk, skb, sgin, sgout, iv,
+ data_len, aead_req, *zc);
+ if (err == -EINPROGRESS)
+ return err;
/* Release the pages in case iov was mapped to pages */
for (; pages > 0; pages--)
@@ -794,8 +1236,12 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
#endif
if (!ctx->decrypted) {
err = decrypt_internal(sk, skb, dest, NULL, chunk, zc);
- if (err < 0)
+ if (err < 0) {
+ if (err == -EINPROGRESS)
+ tls_advance_record_sn(sk, &tls_ctx->rx);
+
return err;
+ }
} else {
*zc = false;
}
@@ -823,18 +1269,20 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
- struct strp_msg *rxm = strp_msg(skb);
- if (len < rxm->full_len) {
- rxm->offset += len;
- rxm->full_len -= len;
+ if (skb) {
+ struct strp_msg *rxm = strp_msg(skb);
- return false;
+ if (len < rxm->full_len) {
+ rxm->offset += len;
+ rxm->full_len -= len;
+ return false;
+ }
+ kfree_skb(skb);
}
/* Finished with message */
ctx->recv_pkt = NULL;
- kfree_skb(skb);
__strp_unpause(&ctx->strp);
return true;
@@ -857,6 +1305,7 @@ int tls_sw_recvmsg(struct sock *sk,
int target, err = 0;
long timeo;
bool is_kvec = msg->msg_iter.type & ITER_KVEC;
+ int num_async = 0;
flags |= nonblock;
@@ -869,6 +1318,7 @@ int tls_sw_recvmsg(struct sock *sk,
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
do {
bool zc = false;
+ bool async = false;
int chunk = 0;
skb = tls_wait_data(sk, flags, timeo, &err);
@@ -876,6 +1326,7 @@ int tls_sw_recvmsg(struct sock *sk,
goto recv_end;
rxm = strp_msg(skb);
+
if (!cmsg) {
int cerr;
@@ -902,26 +1353,39 @@ int tls_sw_recvmsg(struct sock *sk,
err = decrypt_skb_update(sk, skb, &msg->msg_iter,
&chunk, &zc);
- if (err < 0) {
+ if (err < 0 && err != -EINPROGRESS) {
tls_err_abort(sk, EBADMSG);
goto recv_end;
}
+
+ if (err == -EINPROGRESS) {
+ async = true;
+ num_async++;
+ goto pick_next_record;
+ }
+
ctx->decrypted = true;
}
if (!zc) {
chunk = min_t(unsigned int, rxm->full_len, len);
+
err = skb_copy_datagram_msg(skb, rxm->offset, msg,
chunk);
if (err < 0)
goto recv_end;
}
+pick_next_record:
copied += chunk;
len -= chunk;
if (likely(!(flags & MSG_PEEK))) {
u8 control = ctx->control;
+ /* For async, drop current skb reference */
+ if (async)
+ skb = NULL;
+
if (tls_sw_advance_skb(sk, skb, chunk)) {
/* Return full control message to
* userspace before trying to parse
@@ -930,6 +1394,8 @@ int tls_sw_recvmsg(struct sock *sk,
msg->msg_flags |= MSG_EOR;
if (control != TLS_RECORD_TYPE_DATA)
goto recv_end;
+ } else {
+ break;
}
} else {
/* MSG_PEEK right now cannot look beyond current skb
@@ -946,6 +1412,22 @@ int tls_sw_recvmsg(struct sock *sk,
} while (len);
recv_end:
+ if (num_async) {
+ /* Wait for all previously submitted records to be decrypted */
+ smp_store_mb(ctx->async_notify, true);
+ if (atomic_read(&ctx->decrypt_pending)) {
+ err = crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ if (err) {
+ /* one of async decrypt failed */
+ tls_err_abort(sk, err);
+ copied = 0;
+ }
+ } else {
+ reinit_completion(&ctx->async_wait.completion);
+ }
+ WRITE_ONCE(ctx->async_notify, false);
+ }
+
release_sock(sk);
return copied ? : err;
}
@@ -1106,9 +1588,61 @@ void tls_sw_free_resources_tx(struct sock *sk)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+ struct tls_rec *rec, *tmp;
+
+ /* Wait for any pending async encryptions to complete */
+ smp_store_mb(ctx->async_notify, true);
+ if (atomic_read(&ctx->encrypt_pending))
+ crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+
+ cancel_delayed_work_sync(&ctx->tx_work.work);
+
+ /* Tx whatever records we can transmit and abandon the rest */
+ tls_tx_records(sk, -1);
+
+ /* Free up un-sent records in tx_list. First, free
+ * the partially sent record if any at head of tx_list.
+ */
+ if (tls_ctx->partially_sent_record) {
+ struct scatterlist *sg = tls_ctx->partially_sent_record;
+
+ while (1) {
+ put_page(sg_page(sg));
+ sk_mem_uncharge(sk, sg->length);
+
+ if (sg_is_last(sg))
+ break;
+ sg++;
+ }
+
+ tls_ctx->partially_sent_record = NULL;
+
+ rec = list_first_entry(&ctx->tx_list,
+ struct tls_rec, list);
+
+ free_sg(sk, &rec->sg_plaintext_data[1],
+ &rec->sg_plaintext_num_elem,
+ &rec->sg_plaintext_size);
+
+ list_del(&rec->list);
+ kfree(rec);
+ }
+
+ list_for_each_entry_safe(rec, tmp, &ctx->tx_list, list) {
+ free_sg(sk, &rec->sg_encrypted_data[1],
+ &rec->sg_encrypted_num_elem,
+ &rec->sg_encrypted_size);
+
+ free_sg(sk, &rec->sg_plaintext_data[1],
+ &rec->sg_plaintext_num_elem,
+ &rec->sg_plaintext_size);
+
+ list_del(&rec->list);
+ kfree(rec);
+ }
crypto_free_aead(ctx->aead_send);
- tls_free_both_sg(sk);
+ tls_free_open_rec(sk);
kfree(ctx);
}
@@ -1142,6 +1676,24 @@ void tls_sw_free_resources_rx(struct sock *sk)
kfree(ctx);
}
+/* The work handler to transmitt the encrypted records in tx_list */
+static void tx_work_handler(struct work_struct *work)
+{
+ struct delayed_work *delayed_work = to_delayed_work(work);
+ struct tx_work *tx_work = container_of(delayed_work,
+ struct tx_work, work);
+ struct sock *sk = tx_work->sk;
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+
+ if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
+ return;
+
+ lock_sock(sk);
+ tls_tx_records(sk, -1);
+ release_sock(sk);
+}
+
int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
{
struct tls_crypto_info *crypto_info;
@@ -1191,6 +1743,9 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
crypto_info = &ctx->crypto_send.info;
cctx = &ctx->tx;
aead = &sw_ctx_tx->aead_send;
+ INIT_LIST_HEAD(&sw_ctx_tx->tx_list);
+ INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler);
+ sw_ctx_tx->tx_work.sk = sk;
} else {
crypto_init_wait(&sw_ctx_rx->async_wait);
crypto_info = &ctx->crypto_recv.info;
@@ -1241,26 +1796,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
goto free_iv;
}
- if (sw_ctx_tx) {
- sg_init_table(sw_ctx_tx->sg_encrypted_data,
- ARRAY_SIZE(sw_ctx_tx->sg_encrypted_data));
- sg_init_table(sw_ctx_tx->sg_plaintext_data,
- ARRAY_SIZE(sw_ctx_tx->sg_plaintext_data));
-
- sg_init_table(sw_ctx_tx->sg_aead_in, 2);
- sg_set_buf(&sw_ctx_tx->sg_aead_in[0], sw_ctx_tx->aad_space,
- sizeof(sw_ctx_tx->aad_space));
- sg_unmark_end(&sw_ctx_tx->sg_aead_in[1]);
- sg_chain(sw_ctx_tx->sg_aead_in, 2,
- sw_ctx_tx->sg_plaintext_data);
- sg_init_table(sw_ctx_tx->sg_aead_out, 2);
- sg_set_buf(&sw_ctx_tx->sg_aead_out[0], sw_ctx_tx->aad_space,
- sizeof(sw_ctx_tx->aad_space));
- sg_unmark_end(&sw_ctx_tx->sg_aead_out[1]);
- sg_chain(sw_ctx_tx->sg_aead_out, 2,
- sw_ctx_tx->sg_encrypted_data);
- }
-
if (!*aead) {
*aead = crypto_alloc_aead("gcm(aes)", 0, 0);
if (IS_ERR(*aead)) {
diff --git a/net/wireless/core.c b/net/wireless/core.c
index a88551f3bc43..5bd01058b9e6 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1019,36 +1019,49 @@ void cfg80211_cqm_config_free(struct wireless_dev *wdev)
wdev->cqm_config = NULL;
}
-void cfg80211_unregister_wdev(struct wireless_dev *wdev)
+static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
ASSERT_RTNL();
- if (WARN_ON(wdev->netdev))
- return;
-
nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE);
list_del_rcu(&wdev->list);
- synchronize_rcu();
+ if (sync)
+ synchronize_rcu();
rdev->devlist_generation++;
+ cfg80211_mlme_purge_registrations(wdev);
+
switch (wdev->iftype) {
case NL80211_IFTYPE_P2P_DEVICE:
- cfg80211_mlme_purge_registrations(wdev);
cfg80211_stop_p2p_device(rdev, wdev);
break;
case NL80211_IFTYPE_NAN:
cfg80211_stop_nan(rdev, wdev);
break;
default:
- WARN_ON_ONCE(1);
break;
}
+#ifdef CONFIG_CFG80211_WEXT
+ kzfree(wdev->wext.keys);
+#endif
+ /* only initialized if we have a netdev */
+ if (wdev->netdev)
+ flush_work(&wdev->disconnect_wk);
+
cfg80211_cqm_config_free(wdev);
}
+
+void cfg80211_unregister_wdev(struct wireless_dev *wdev)
+{
+ if (WARN_ON(wdev->netdev))
+ return;
+
+ __cfg80211_unregister_wdev(wdev, true);
+}
EXPORT_SYMBOL(cfg80211_unregister_wdev);
static const struct device_type wiphy_type = {
@@ -1153,6 +1166,30 @@ void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev,
}
EXPORT_SYMBOL(cfg80211_stop_iface);
+void cfg80211_init_wdev(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev)
+{
+ mutex_init(&wdev->mtx);
+ INIT_LIST_HEAD(&wdev->event_list);
+ spin_lock_init(&wdev->event_lock);
+ INIT_LIST_HEAD(&wdev->mgmt_registrations);
+ spin_lock_init(&wdev->mgmt_registrations_lock);
+
+ /*
+ * We get here also when the interface changes network namespaces,
+ * as it's registered into the new one, but we don't want it to
+ * change ID in that case. Checking if the ID is already assigned
+ * works, because 0 isn't considered a valid ID and the memory is
+ * 0-initialized.
+ */
+ if (!wdev->identifier)
+ wdev->identifier = ++rdev->wdev_id;
+ list_add_rcu(&wdev->list, &rdev->wiphy.wdev_list);
+ rdev->devlist_generation++;
+
+ nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
+}
+
static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
unsigned long state, void *ptr)
{
@@ -1178,23 +1215,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
* called within code protected by it when interfaces
* are added with nl80211.
*/
- mutex_init(&wdev->mtx);
- INIT_LIST_HEAD(&wdev->event_list);
- spin_lock_init(&wdev->event_lock);
- INIT_LIST_HEAD(&wdev->mgmt_registrations);
- spin_lock_init(&wdev->mgmt_registrations_lock);
-
- /*
- * We get here also when the interface changes network namespaces,
- * as it's registered into the new one, but we don't want it to
- * change ID in that case. Checking if the ID is already assigned
- * works, because 0 isn't considered a valid ID and the memory is
- * 0-initialized.
- */
- if (!wdev->identifier)
- wdev->identifier = ++rdev->wdev_id;
- list_add_rcu(&wdev->list, &rdev->wiphy.wdev_list);
- rdev->devlist_generation++;
/* can only change netns with wiphy */
dev->features |= NETIF_F_NETNS_LOCAL;
@@ -1223,7 +1243,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
INIT_WORK(&wdev->disconnect_wk, cfg80211_autodisconnect_wk);
- nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
+ cfg80211_init_wdev(rdev, wdev);
break;
case NETDEV_GOING_DOWN:
cfg80211_leave(rdev, wdev);
@@ -1238,7 +1258,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
list_for_each_entry_safe(pos, tmp,
&rdev->sched_scan_req_list, list) {
- if (WARN_ON(pos && pos->dev == wdev->netdev))
+ if (WARN_ON(pos->dev == wdev->netdev))
cfg80211_stop_sched_scan_req(rdev, pos, false);
}
@@ -1302,17 +1322,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
* remove and clean it up.
*/
if (!list_empty(&wdev->list)) {
- nl80211_notify_iface(rdev, wdev,
- NL80211_CMD_DEL_INTERFACE);
+ __cfg80211_unregister_wdev(wdev, false);
sysfs_remove_link(&dev->dev.kobj, "phy80211");
- list_del_rcu(&wdev->list);
- rdev->devlist_generation++;
- cfg80211_mlme_purge_registrations(wdev);
-#ifdef CONFIG_CFG80211_WEXT
- kzfree(wdev->wext.keys);
-#endif
- flush_work(&wdev->disconnect_wk);
- cfg80211_cqm_config_free(wdev);
}
/*
* synchronise (so that we won't find this netdev
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 7f52ef569320..c61dbba8bf47 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -66,6 +66,7 @@ struct cfg80211_registered_device {
/* protected by RTNL only */
int num_running_ifaces;
int num_running_monitor_ifaces;
+ u64 cookie_counter;
/* BSSes/scanning */
spinlock_t bss_lock;
@@ -133,6 +134,16 @@ cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev)
#endif
}
+static inline u64 cfg80211_assign_cookie(struct cfg80211_registered_device *rdev)
+{
+ u64 r = ++rdev->cookie_counter;
+
+ if (WARN_ON(r == 0))
+ r = ++rdev->cookie_counter;
+
+ return r;
+}
+
extern struct workqueue_struct *cfg80211_wq;
extern struct list_head cfg80211_rdev_list;
extern int cfg80211_rdev_list_generation;
@@ -187,6 +198,9 @@ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx);
int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
struct net *net);
+void cfg80211_init_wdev(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev);
+
static inline void wdev_lock(struct wireless_dev *wdev)
__acquires(wdev)
{
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index e6bce1f130c9..b5e235573c8a 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -30,7 +30,7 @@
#include <net/iw_handler.h>
#include <crypto/hash.h>
-#include <crypto/skcipher.h>
+#include <linux/crypto.h>
#include <linux/crc32.h>
#include <net/lib80211.h>
@@ -64,9 +64,9 @@ struct lib80211_tkip_data {
int key_idx;
- struct crypto_skcipher *rx_tfm_arc4;
+ struct crypto_cipher *rx_tfm_arc4;
struct crypto_shash *rx_tfm_michael;
- struct crypto_skcipher *tx_tfm_arc4;
+ struct crypto_cipher *tx_tfm_arc4;
struct crypto_shash *tx_tfm_michael;
/* scratch buffers for virt_to_page() (crypto API) */
@@ -99,8 +99,7 @@ static void *lib80211_tkip_init(int key_idx)
priv->key_idx = key_idx;
- priv->tx_tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0,
- CRYPTO_ALG_ASYNC);
+ priv->tx_tfm_arc4 = crypto_alloc_cipher("arc4", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(priv->tx_tfm_arc4)) {
priv->tx_tfm_arc4 = NULL;
goto fail;
@@ -112,8 +111,7 @@ static void *lib80211_tkip_init(int key_idx)
goto fail;
}
- priv->rx_tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0,
- CRYPTO_ALG_ASYNC);
+ priv->rx_tfm_arc4 = crypto_alloc_cipher("arc4", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(priv->rx_tfm_arc4)) {
priv->rx_tfm_arc4 = NULL;
goto fail;
@@ -130,9 +128,9 @@ static void *lib80211_tkip_init(int key_idx)
fail:
if (priv) {
crypto_free_shash(priv->tx_tfm_michael);
- crypto_free_skcipher(priv->tx_tfm_arc4);
+ crypto_free_cipher(priv->tx_tfm_arc4);
crypto_free_shash(priv->rx_tfm_michael);
- crypto_free_skcipher(priv->rx_tfm_arc4);
+ crypto_free_cipher(priv->rx_tfm_arc4);
kfree(priv);
}
@@ -144,9 +142,9 @@ static void lib80211_tkip_deinit(void *priv)
struct lib80211_tkip_data *_priv = priv;
if (_priv) {
crypto_free_shash(_priv->tx_tfm_michael);
- crypto_free_skcipher(_priv->tx_tfm_arc4);
+ crypto_free_cipher(_priv->tx_tfm_arc4);
crypto_free_shash(_priv->rx_tfm_michael);
- crypto_free_skcipher(_priv->rx_tfm_arc4);
+ crypto_free_cipher(_priv->rx_tfm_arc4);
}
kfree(priv);
}
@@ -344,12 +342,10 @@ static int lib80211_tkip_hdr(struct sk_buff *skb, int hdr_len,
static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
{
struct lib80211_tkip_data *tkey = priv;
- SKCIPHER_REQUEST_ON_STACK(req, tkey->tx_tfm_arc4);
int len;
u8 rc4key[16], *pos, *icv;
u32 crc;
- struct scatterlist sg;
- int err;
+ int i;
if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) {
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
@@ -374,14 +370,10 @@ static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
icv[2] = crc >> 16;
icv[3] = crc >> 24;
- crypto_skcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16);
- sg_init_one(&sg, pos, len + 4);
- skcipher_request_set_tfm(req, tkey->tx_tfm_arc4);
- skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg, &sg, len + 4, NULL);
- err = crypto_skcipher_encrypt(req);
- skcipher_request_zero(req);
- return err;
+ crypto_cipher_setkey(tkey->tx_tfm_arc4, rc4key, 16);
+ for (i = 0; i < len + 4; i++)
+ crypto_cipher_encrypt_one(tkey->tx_tfm_arc4, pos + i, pos + i);
+ return 0;
}
/*
@@ -400,7 +392,6 @@ static inline int tkip_replay_check(u32 iv32_n, u16 iv16_n,
static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
{
struct lib80211_tkip_data *tkey = priv;
- SKCIPHER_REQUEST_ON_STACK(req, tkey->rx_tfm_arc4);
u8 rc4key[16];
u8 keyidx, *pos;
u32 iv32;
@@ -408,9 +399,8 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
struct ieee80211_hdr *hdr;
u8 icv[4];
u32 crc;
- struct scatterlist sg;
int plen;
- int err;
+ int i;
hdr = (struct ieee80211_hdr *)skb->data;
@@ -463,18 +453,9 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
plen = skb->len - hdr_len - 12;
- crypto_skcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16);
- sg_init_one(&sg, pos, plen + 4);
- skcipher_request_set_tfm(req, tkey->rx_tfm_arc4);
- skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg, &sg, plen + 4, NULL);
- err = crypto_skcipher_decrypt(req);
- skcipher_request_zero(req);
- if (err) {
- net_dbg_ratelimited("TKIP: failed to decrypt received packet from %pM\n",
- hdr->addr2);
- return -7;
- }
+ crypto_cipher_setkey(tkey->rx_tfm_arc4, rc4key, 16);
+ for (i = 0; i < plen + 4; i++)
+ crypto_cipher_decrypt_one(tkey->rx_tfm_arc4, pos + i, pos + i);
crc = ~crc32_le(~0, pos, plen);
icv[0] = crc;
@@ -660,9 +641,9 @@ static int lib80211_tkip_set_key(void *key, int len, u8 * seq, void *priv)
struct lib80211_tkip_data *tkey = priv;
int keyidx;
struct crypto_shash *tfm = tkey->tx_tfm_michael;
- struct crypto_skcipher *tfm2 = tkey->tx_tfm_arc4;
+ struct crypto_cipher *tfm2 = tkey->tx_tfm_arc4;
struct crypto_shash *tfm3 = tkey->rx_tfm_michael;
- struct crypto_skcipher *tfm4 = tkey->rx_tfm_arc4;
+ struct crypto_cipher *tfm4 = tkey->rx_tfm_arc4;
keyidx = tkey->key_idx;
memset(tkey, 0, sizeof(*tkey));
diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c
index d05f58b0fd04..6015f6b542a6 100644
--- a/net/wireless/lib80211_crypt_wep.c
+++ b/net/wireless/lib80211_crypt_wep.c
@@ -22,7 +22,7 @@
#include <net/lib80211.h>
-#include <crypto/skcipher.h>
+#include <linux/crypto.h>
#include <linux/crc32.h>
MODULE_AUTHOR("Jouni Malinen");
@@ -35,8 +35,8 @@ struct lib80211_wep_data {
u8 key[WEP_KEY_LEN + 1];
u8 key_len;
u8 key_idx;
- struct crypto_skcipher *tx_tfm;
- struct crypto_skcipher *rx_tfm;
+ struct crypto_cipher *tx_tfm;
+ struct crypto_cipher *rx_tfm;
};
static void *lib80211_wep_init(int keyidx)
@@ -48,13 +48,13 @@ static void *lib80211_wep_init(int keyidx)
goto fail;
priv->key_idx = keyidx;
- priv->tx_tfm = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+ priv->tx_tfm = crypto_alloc_cipher("arc4", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(priv->tx_tfm)) {
priv->tx_tfm = NULL;
goto fail;
}
- priv->rx_tfm = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+ priv->rx_tfm = crypto_alloc_cipher("arc4", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(priv->rx_tfm)) {
priv->rx_tfm = NULL;
goto fail;
@@ -66,8 +66,8 @@ static void *lib80211_wep_init(int keyidx)
fail:
if (priv) {
- crypto_free_skcipher(priv->tx_tfm);
- crypto_free_skcipher(priv->rx_tfm);
+ crypto_free_cipher(priv->tx_tfm);
+ crypto_free_cipher(priv->rx_tfm);
kfree(priv);
}
return NULL;
@@ -77,8 +77,8 @@ static void lib80211_wep_deinit(void *priv)
{
struct lib80211_wep_data *_priv = priv;
if (_priv) {
- crypto_free_skcipher(_priv->tx_tfm);
- crypto_free_skcipher(_priv->rx_tfm);
+ crypto_free_cipher(_priv->tx_tfm);
+ crypto_free_cipher(_priv->rx_tfm);
}
kfree(priv);
}
@@ -129,12 +129,10 @@ static int lib80211_wep_build_iv(struct sk_buff *skb, int hdr_len,
static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
{
struct lib80211_wep_data *wep = priv;
- SKCIPHER_REQUEST_ON_STACK(req, wep->tx_tfm);
u32 crc, klen, len;
u8 *pos, *icv;
- struct scatterlist sg;
u8 key[WEP_KEY_LEN + 3];
- int err;
+ int i;
/* other checks are in lib80211_wep_build_iv */
if (skb_tailroom(skb) < 4)
@@ -162,14 +160,12 @@ static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
icv[2] = crc >> 16;
icv[3] = crc >> 24;
- crypto_skcipher_setkey(wep->tx_tfm, key, klen);
- sg_init_one(&sg, pos, len + 4);
- skcipher_request_set_tfm(req, wep->tx_tfm);
- skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg, &sg, len + 4, NULL);
- err = crypto_skcipher_encrypt(req);
- skcipher_request_zero(req);
- return err;
+ crypto_cipher_setkey(wep->tx_tfm, key, klen);
+
+ for (i = 0; i < len + 4; i++)
+ crypto_cipher_encrypt_one(wep->tx_tfm, pos + i, pos + i);
+
+ return 0;
}
/* Perform WEP decryption on given buffer. Buffer includes whole WEP part of
@@ -182,12 +178,10 @@ static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
{
struct lib80211_wep_data *wep = priv;
- SKCIPHER_REQUEST_ON_STACK(req, wep->rx_tfm);
u32 crc, klen, plen;
u8 key[WEP_KEY_LEN + 3];
u8 keyidx, *pos, icv[4];
- struct scatterlist sg;
- int err;
+ int i;
if (skb->len < hdr_len + 8)
return -1;
@@ -208,15 +202,9 @@ static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
/* Apply RC4 to data and compute CRC32 over decrypted data */
plen = skb->len - hdr_len - 8;
- crypto_skcipher_setkey(wep->rx_tfm, key, klen);
- sg_init_one(&sg, pos, plen + 4);
- skcipher_request_set_tfm(req, wep->rx_tfm);
- skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg, &sg, plen + 4, NULL);
- err = crypto_skcipher_decrypt(req);
- skcipher_request_zero(req);
- if (err)
- return -7;
+ crypto_cipher_setkey(wep->rx_tfm, key, klen);
+ for (i = 0; i < plen + 4; i++)
+ crypto_cipher_decrypt_one(wep->rx_tfm, pos + i, pos + i);
crc = ~crc32_le(~0, pos, plen);
icv[0] = crc;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 176edfefcbaa..744b5851bbf9 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -200,7 +200,46 @@ cfg80211_get_dev_from_info(struct net *netns, struct genl_info *info)
return __cfg80211_rdev_from_attrs(netns, info->attrs);
}
+static int validate_ie_attr(const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ const u8 *pos;
+ int len;
+
+ pos = nla_data(attr);
+ len = nla_len(attr);
+
+ while (len) {
+ u8 elemlen;
+
+ if (len < 2)
+ goto error;
+ len -= 2;
+
+ elemlen = pos[1];
+ if (elemlen > len)
+ goto error;
+
+ len -= elemlen;
+ pos += 2 + elemlen;
+ }
+
+ return 0;
+error:
+ NL_SET_ERR_MSG_ATTR(extack, attr, "malformed information elements");
+ return -EINVAL;
+}
+
/* policy for the attributes */
+static const struct nla_policy
+nl80211_ftm_responder_policy[NL80211_FTM_RESP_ATTR_MAX + 1] = {
+ [NL80211_FTM_RESP_ATTR_ENABLED] = { .type = NLA_FLAG, },
+ [NL80211_FTM_RESP_ATTR_LCI] = { .type = NLA_BINARY,
+ .len = U8_MAX },
+ [NL80211_FTM_RESP_ATTR_CIVICLOC] = { .type = NLA_BINARY,
+ .len = U8_MAX },
+};
+
static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_WIPHY] = { .type = NLA_U32 },
[NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING,
@@ -213,14 +252,14 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_CENTER_FREQ1] = { .type = NLA_U32 },
[NL80211_ATTR_CENTER_FREQ2] = { .type = NLA_U32 },
- [NL80211_ATTR_WIPHY_RETRY_SHORT] = { .type = NLA_U8 },
- [NL80211_ATTR_WIPHY_RETRY_LONG] = { .type = NLA_U8 },
+ [NL80211_ATTR_WIPHY_RETRY_SHORT] = NLA_POLICY_MIN(NLA_U8, 1),
+ [NL80211_ATTR_WIPHY_RETRY_LONG] = NLA_POLICY_MIN(NLA_U8, 1),
[NL80211_ATTR_WIPHY_FRAG_THRESHOLD] = { .type = NLA_U32 },
[NL80211_ATTR_WIPHY_RTS_THRESHOLD] = { .type = NLA_U32 },
[NL80211_ATTR_WIPHY_COVERAGE_CLASS] = { .type = NLA_U8 },
[NL80211_ATTR_WIPHY_DYN_ACK] = { .type = NLA_FLAG },
- [NL80211_ATTR_IFTYPE] = { .type = NLA_U32 },
+ [NL80211_ATTR_IFTYPE] = NLA_POLICY_MAX(NLA_U32, NL80211_IFTYPE_MAX),
[NL80211_ATTR_IFINDEX] = { .type = NLA_U32 },
[NL80211_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 },
@@ -230,24 +269,28 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_KEY] = { .type = NLA_NESTED, },
[NL80211_ATTR_KEY_DATA] = { .type = NLA_BINARY,
.len = WLAN_MAX_KEY_LEN },
- [NL80211_ATTR_KEY_IDX] = { .type = NLA_U8 },
+ [NL80211_ATTR_KEY_IDX] = NLA_POLICY_MAX(NLA_U8, 5),
[NL80211_ATTR_KEY_CIPHER] = { .type = NLA_U32 },
[NL80211_ATTR_KEY_DEFAULT] = { .type = NLA_FLAG },
[NL80211_ATTR_KEY_SEQ] = { .type = NLA_BINARY, .len = 16 },
- [NL80211_ATTR_KEY_TYPE] = { .type = NLA_U32 },
+ [NL80211_ATTR_KEY_TYPE] =
+ NLA_POLICY_MAX(NLA_U32, NUM_NL80211_KEYTYPES),
[NL80211_ATTR_BEACON_INTERVAL] = { .type = NLA_U32 },
[NL80211_ATTR_DTIM_PERIOD] = { .type = NLA_U32 },
[NL80211_ATTR_BEACON_HEAD] = { .type = NLA_BINARY,
.len = IEEE80211_MAX_DATA_LEN },
- [NL80211_ATTR_BEACON_TAIL] = { .type = NLA_BINARY,
- .len = IEEE80211_MAX_DATA_LEN },
- [NL80211_ATTR_STA_AID] = { .type = NLA_U16 },
+ [NL80211_ATTR_BEACON_TAIL] =
+ NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr,
+ IEEE80211_MAX_DATA_LEN),
+ [NL80211_ATTR_STA_AID] =
+ NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID),
[NL80211_ATTR_STA_FLAGS] = { .type = NLA_NESTED },
[NL80211_ATTR_STA_LISTEN_INTERVAL] = { .type = NLA_U16 },
[NL80211_ATTR_STA_SUPPORTED_RATES] = { .type = NLA_BINARY,
.len = NL80211_MAX_SUPP_RATES },
- [NL80211_ATTR_STA_PLINK_ACTION] = { .type = NLA_U8 },
+ [NL80211_ATTR_STA_PLINK_ACTION] =
+ NLA_POLICY_MAX(NLA_U8, NUM_NL80211_PLINK_ACTIONS - 1),
[NL80211_ATTR_STA_VLAN] = { .type = NLA_U32 },
[NL80211_ATTR_MNTR_FLAGS] = { /* NLA_NESTED can't be empty */ },
[NL80211_ATTR_MESH_ID] = { .type = NLA_BINARY,
@@ -270,8 +313,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_HT_CAPABILITY] = { .len = NL80211_HT_CAPABILITY_LEN },
[NL80211_ATTR_MGMT_SUBTYPE] = { .type = NLA_U8 },
- [NL80211_ATTR_IE] = { .type = NLA_BINARY,
- .len = IEEE80211_MAX_DATA_LEN },
+ [NL80211_ATTR_IE] = NLA_POLICY_VALIDATE_FN(NLA_BINARY,
+ validate_ie_attr,
+ IEEE80211_MAX_DATA_LEN),
[NL80211_ATTR_SCAN_FREQUENCIES] = { .type = NLA_NESTED },
[NL80211_ATTR_SCAN_SSIDS] = { .type = NLA_NESTED },
@@ -281,7 +325,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_REASON_CODE] = { .type = NLA_U16 },
[NL80211_ATTR_FREQ_FIXED] = { .type = NLA_FLAG },
[NL80211_ATTR_TIMED_OUT] = { .type = NLA_FLAG },
- [NL80211_ATTR_USE_MFP] = { .type = NLA_U32 },
+ [NL80211_ATTR_USE_MFP] = NLA_POLICY_RANGE(NLA_U32,
+ NL80211_MFP_NO,
+ NL80211_MFP_OPTIONAL),
[NL80211_ATTR_STA_FLAGS2] = {
.len = sizeof(struct nl80211_sta_flag_update),
},
@@ -301,7 +347,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_FRAME] = { .type = NLA_BINARY,
.len = IEEE80211_MAX_DATA_LEN },
[NL80211_ATTR_FRAME_MATCH] = { .type = NLA_BINARY, },
- [NL80211_ATTR_PS_STATE] = { .type = NLA_U32 },
+ [NL80211_ATTR_PS_STATE] = NLA_POLICY_RANGE(NLA_U32,
+ NL80211_PS_DISABLED,
+ NL80211_PS_ENABLED),
[NL80211_ATTR_CQM] = { .type = NLA_NESTED, },
[NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG },
[NL80211_ATTR_AP_ISOLATE] = { .type = NLA_U8 },
@@ -314,15 +362,23 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_OFFCHANNEL_TX_OK] = { .type = NLA_FLAG },
[NL80211_ATTR_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED },
[NL80211_ATTR_WOWLAN_TRIGGERS] = { .type = NLA_NESTED },
- [NL80211_ATTR_STA_PLINK_STATE] = { .type = NLA_U8 },
+ [NL80211_ATTR_STA_PLINK_STATE] =
+ NLA_POLICY_MAX(NLA_U8, NUM_NL80211_PLINK_STATES - 1),
+ [NL80211_ATTR_MESH_PEER_AID] =
+ NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID),
[NL80211_ATTR_SCHED_SCAN_INTERVAL] = { .type = NLA_U32 },
[NL80211_ATTR_REKEY_DATA] = { .type = NLA_NESTED },
[NL80211_ATTR_SCAN_SUPP_RATES] = { .type = NLA_NESTED },
- [NL80211_ATTR_HIDDEN_SSID] = { .type = NLA_U32 },
- [NL80211_ATTR_IE_PROBE_RESP] = { .type = NLA_BINARY,
- .len = IEEE80211_MAX_DATA_LEN },
- [NL80211_ATTR_IE_ASSOC_RESP] = { .type = NLA_BINARY,
- .len = IEEE80211_MAX_DATA_LEN },
+ [NL80211_ATTR_HIDDEN_SSID] =
+ NLA_POLICY_RANGE(NLA_U32,
+ NL80211_HIDDEN_SSID_NOT_IN_USE,
+ NL80211_HIDDEN_SSID_ZERO_CONTENTS),
+ [NL80211_ATTR_IE_PROBE_RESP] =
+ NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr,
+ IEEE80211_MAX_DATA_LEN),
+ [NL80211_ATTR_IE_ASSOC_RESP] =
+ NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr,
+ IEEE80211_MAX_DATA_LEN),
[NL80211_ATTR_ROAM_SUPPORT] = { .type = NLA_FLAG },
[NL80211_ATTR_SCHED_SCAN_MATCH] = { .type = NLA_NESTED },
[NL80211_ATTR_TX_NO_CCK_RATE] = { .type = NLA_FLAG },
@@ -348,9 +404,12 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_AUTH_DATA] = { .type = NLA_BINARY, },
[NL80211_ATTR_VHT_CAPABILITY] = { .len = NL80211_VHT_CAPABILITY_LEN },
[NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 },
- [NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 },
- [NL80211_ATTR_P2P_OPPPS] = { .type = NLA_U8 },
- [NL80211_ATTR_LOCAL_MESH_POWER_MODE] = {. type = NLA_U32 },
+ [NL80211_ATTR_P2P_CTWINDOW] = NLA_POLICY_MAX(NLA_U8, 127),
+ [NL80211_ATTR_P2P_OPPPS] = NLA_POLICY_MAX(NLA_U8, 1),
+ [NL80211_ATTR_LOCAL_MESH_POWER_MODE] =
+ NLA_POLICY_RANGE(NLA_U32,
+ NL80211_MESH_POWER_UNKNOWN + 1,
+ NL80211_MESH_POWER_MAX),
[NL80211_ATTR_ACL_POLICY] = {. type = NLA_U32 },
[NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED },
[NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 },
@@ -363,7 +422,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_MDID] = { .type = NLA_U16 },
[NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY,
.len = IEEE80211_MAX_DATA_LEN },
- [NL80211_ATTR_PEER_AID] = { .type = NLA_U16 },
+ [NL80211_ATTR_PEER_AID] =
+ NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID),
[NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 },
[NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG },
[NL80211_ATTR_CSA_IES] = { .type = NLA_NESTED },
@@ -384,8 +444,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_SOCKET_OWNER] = { .type = NLA_FLAG },
[NL80211_ATTR_CSA_C_OFFSETS_TX] = { .type = NLA_BINARY },
[NL80211_ATTR_USE_RRM] = { .type = NLA_FLAG },
- [NL80211_ATTR_TSID] = { .type = NLA_U8 },
- [NL80211_ATTR_USER_PRIO] = { .type = NLA_U8 },
+ [NL80211_ATTR_TSID] = NLA_POLICY_MAX(NLA_U8, IEEE80211_NUM_TIDS - 1),
+ [NL80211_ATTR_USER_PRIO] =
+ NLA_POLICY_MAX(NLA_U8, IEEE80211_NUM_UPS - 1),
[NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 },
[NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 },
[NL80211_ATTR_MAC_MASK] = { .len = ETH_ALEN },
@@ -395,12 +456,13 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_REG_INDOOR] = { .type = NLA_FLAG },
[NL80211_ATTR_PBSS] = { .type = NLA_FLAG },
[NL80211_ATTR_BSS_SELECT] = { .type = NLA_NESTED },
- [NL80211_ATTR_STA_SUPPORT_P2P_PS] = { .type = NLA_U8 },
+ [NL80211_ATTR_STA_SUPPORT_P2P_PS] =
+ NLA_POLICY_MAX(NLA_U8, NUM_NL80211_P2P_PS_STATUS - 1),
[NL80211_ATTR_MU_MIMO_GROUP_DATA] = {
.len = VHT_MUMIMO_GROUPS_DATA_LEN
},
[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = { .len = ETH_ALEN },
- [NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 },
+ [NL80211_ATTR_NAN_MASTER_PREF] = NLA_POLICY_MIN(NLA_U8, 1),
[NL80211_ATTR_BANDS] = { .type = NLA_U32 },
[NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED },
[NL80211_ATTR_FILS_KEK] = { .type = NLA_BINARY,
@@ -430,6 +492,11 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_TXQ_QUANTUM] = { .type = NLA_U32 },
[NL80211_ATTR_HE_CAPABILITY] = { .type = NLA_BINARY,
.len = NL80211_HE_MAX_CAPABILITY_LEN },
+
+ [NL80211_ATTR_FTM_RESPONDER] = {
+ .type = NLA_NESTED,
+ .validation_data = nl80211_ftm_responder_policy,
+ },
};
/* policy for the key attributes */
@@ -440,7 +507,7 @@ static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = {
[NL80211_KEY_SEQ] = { .type = NLA_BINARY, .len = 16 },
[NL80211_KEY_DEFAULT] = { .type = NLA_FLAG },
[NL80211_KEY_DEFAULT_MGMT] = { .type = NLA_FLAG },
- [NL80211_KEY_TYPE] = { .type = NLA_U32 },
+ [NL80211_KEY_TYPE] = NLA_POLICY_MAX(NLA_U32, NUM_NL80211_KEYTYPES - 1),
[NL80211_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED },
};
@@ -491,7 +558,10 @@ nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = {
static const struct nla_policy
nl80211_coalesce_policy[NUM_NL80211_ATTR_COALESCE_RULE] = {
[NL80211_ATTR_COALESCE_RULE_DELAY] = { .type = NLA_U32 },
- [NL80211_ATTR_COALESCE_RULE_CONDITION] = { .type = NLA_U32 },
+ [NL80211_ATTR_COALESCE_RULE_CONDITION] =
+ NLA_POLICY_RANGE(NLA_U32,
+ NL80211_COALESCE_CONDITION_MATCH,
+ NL80211_COALESCE_CONDITION_NO_MATCH),
[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN] = { .type = NLA_NESTED },
};
@@ -567,8 +637,7 @@ nl80211_packet_pattern_policy[MAX_NL80211_PKTPAT + 1] = {
[NL80211_PKTPAT_OFFSET] = { .type = NLA_U32 },
};
-static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
- struct netlink_callback *cb,
+static int nl80211_prepare_wdev_dump(struct netlink_callback *cb,
struct cfg80211_registered_device **rdev,
struct wireless_dev **wdev)
{
@@ -582,7 +651,7 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
return err;
*wdev = __cfg80211_wdev_from_attrs(
- sock_net(skb->sk),
+ sock_net(cb->skb->sk),
genl_family_attrbuf(&nl80211_fam));
if (IS_ERR(*wdev))
return PTR_ERR(*wdev);
@@ -614,36 +683,6 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
return 0;
}
-/* IE validation */
-static bool is_valid_ie_attr(const struct nlattr *attr)
-{
- const u8 *pos;
- int len;
-
- if (!attr)
- return true;
-
- pos = nla_data(attr);
- len = nla_len(attr);
-
- while (len) {
- u8 elemlen;
-
- if (len < 2)
- return false;
- len -= 2;
-
- elemlen = pos[1];
- if (elemlen > len)
- return false;
-
- len -= elemlen;
- pos += 2 + elemlen;
- }
-
- return true;
-}
-
/* message building helper */
static inline void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
int flags, u8 cmd)
@@ -858,12 +897,8 @@ static int nl80211_parse_key_new(struct genl_info *info, struct nlattr *key,
if (tb[NL80211_KEY_CIPHER])
k->p.cipher = nla_get_u32(tb[NL80211_KEY_CIPHER]);
- if (tb[NL80211_KEY_TYPE]) {
+ if (tb[NL80211_KEY_TYPE])
k->type = nla_get_u32(tb[NL80211_KEY_TYPE]);
- if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES)
- return genl_err_attr(info, -EINVAL,
- tb[NL80211_KEY_TYPE]);
- }
if (tb[NL80211_KEY_DEFAULT_TYPES]) {
struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES];
@@ -910,13 +945,8 @@ static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k)
if (k->defmgmt)
k->def_multi = true;
- if (info->attrs[NL80211_ATTR_KEY_TYPE]) {
+ if (info->attrs[NL80211_ATTR_KEY_TYPE])
k->type = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]);
- if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES) {
- GENL_SET_ERR_MSG(info, "key type out of range");
- return -EINVAL;
- }
- }
if (info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES]) {
struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES];
@@ -2292,12 +2322,14 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
struct genl_info *info,
struct cfg80211_chan_def *chandef)
{
+ struct netlink_ext_ack *extack = info->extack;
+ struct nlattr **attrs = info->attrs;
u32 control_freq;
- if (!info->attrs[NL80211_ATTR_WIPHY_FREQ])
+ if (!attrs[NL80211_ATTR_WIPHY_FREQ])
return -EINVAL;
- control_freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
+ control_freq = nla_get_u32(attrs[NL80211_ATTR_WIPHY_FREQ]);
chandef->chan = ieee80211_get_channel(&rdev->wiphy, control_freq);
chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
@@ -2305,14 +2337,16 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
chandef->center_freq2 = 0;
/* Primary channel not allowed */
- if (!chandef->chan || chandef->chan->flags & IEEE80211_CHAN_DISABLED)
+ if (!chandef->chan || chandef->chan->flags & IEEE80211_CHAN_DISABLED) {
+ NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_WIPHY_FREQ],
+ "Channel is disabled");
return -EINVAL;
+ }
- if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
+ if (attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
enum nl80211_channel_type chantype;
- chantype = nla_get_u32(
- info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]);
+ chantype = nla_get_u32(attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]);
switch (chantype) {
case NL80211_CHAN_NO_HT:
@@ -2322,42 +2356,56 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
cfg80211_chandef_create(chandef, chandef->chan,
chantype);
/* user input for center_freq is incorrect */
- if (info->attrs[NL80211_ATTR_CENTER_FREQ1] &&
- chandef->center_freq1 != nla_get_u32(
- info->attrs[NL80211_ATTR_CENTER_FREQ1]))
+ if (attrs[NL80211_ATTR_CENTER_FREQ1] &&
+ chandef->center_freq1 != nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1])) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ attrs[NL80211_ATTR_CENTER_FREQ1],
+ "bad center frequency 1");
return -EINVAL;
+ }
/* center_freq2 must be zero */
- if (info->attrs[NL80211_ATTR_CENTER_FREQ2] &&
- nla_get_u32(info->attrs[NL80211_ATTR_CENTER_FREQ2]))
+ if (attrs[NL80211_ATTR_CENTER_FREQ2] &&
+ nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ2])) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ attrs[NL80211_ATTR_CENTER_FREQ2],
+ "center frequency 2 can't be used");
return -EINVAL;
+ }
break;
default:
+ NL_SET_ERR_MSG_ATTR(extack,
+ attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE],
+ "invalid channel type");
return -EINVAL;
}
- } else if (info->attrs[NL80211_ATTR_CHANNEL_WIDTH]) {
+ } else if (attrs[NL80211_ATTR_CHANNEL_WIDTH]) {
chandef->width =
- nla_get_u32(info->attrs[NL80211_ATTR_CHANNEL_WIDTH]);
- if (info->attrs[NL80211_ATTR_CENTER_FREQ1])
+ nla_get_u32(attrs[NL80211_ATTR_CHANNEL_WIDTH]);
+ if (attrs[NL80211_ATTR_CENTER_FREQ1])
chandef->center_freq1 =
- nla_get_u32(
- info->attrs[NL80211_ATTR_CENTER_FREQ1]);
- if (info->attrs[NL80211_ATTR_CENTER_FREQ2])
+ nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]);
+ if (attrs[NL80211_ATTR_CENTER_FREQ2])
chandef->center_freq2 =
- nla_get_u32(
- info->attrs[NL80211_ATTR_CENTER_FREQ2]);
+ nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ2]);
}
- if (!cfg80211_chandef_valid(chandef))
+ if (!cfg80211_chandef_valid(chandef)) {
+ NL_SET_ERR_MSG(extack, "invalid channel definition");
return -EINVAL;
+ }
if (!cfg80211_chandef_usable(&rdev->wiphy, chandef,
- IEEE80211_CHAN_DISABLED))
+ IEEE80211_CHAN_DISABLED)) {
+ NL_SET_ERR_MSG(extack, "(extension) channel is disabled");
return -EINVAL;
+ }
if ((chandef->width == NL80211_CHAN_WIDTH_5 ||
chandef->width == NL80211_CHAN_WIDTH_10) &&
- !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ))
+ !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ)) {
+ NL_SET_ERR_MSG(extack, "5/10 MHz not supported");
return -EINVAL;
+ }
return 0;
}
@@ -2617,8 +2665,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) {
retry_short = nla_get_u8(
info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]);
- if (retry_short == 0)
- return -EINVAL;
changed |= WIPHY_PARAM_RETRY_SHORT;
}
@@ -2626,8 +2672,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_WIPHY_RETRY_LONG]) {
retry_long = nla_get_u8(
info->attrs[NL80211_ATTR_WIPHY_RETRY_LONG]);
- if (retry_long == 0)
- return -EINVAL;
changed |= WIPHY_PARAM_RETRY_LONG;
}
@@ -3119,8 +3163,6 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
ntype = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]);
if (otype != ntype)
change = true;
- if (ntype > NL80211_IFTYPE_MAX)
- return -EINVAL;
}
if (info->attrs[NL80211_ATTR_MESH_ID]) {
@@ -3185,11 +3227,8 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
if (!info->attrs[NL80211_ATTR_IFNAME])
return -EINVAL;
- if (info->attrs[NL80211_ATTR_IFTYPE]) {
+ if (info->attrs[NL80211_ATTR_IFTYPE])
type = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]);
- if (type > NL80211_IFTYPE_MAX)
- return -EINVAL;
- }
if (!rdev->ops->add_virtual_intf ||
!(rdev->wiphy.interface_modes & (1 << type)))
@@ -3252,15 +3291,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
* P2P Device and NAN do not have a netdev, so don't go
* through the netdev notifier and must be added here
*/
- mutex_init(&wdev->mtx);
- INIT_LIST_HEAD(&wdev->event_list);
- spin_lock_init(&wdev->event_lock);
- INIT_LIST_HEAD(&wdev->mgmt_registrations);
- spin_lock_init(&wdev->mgmt_registrations_lock);
-
- wdev->identifier = ++rdev->wdev_id;
- list_add_rcu(&wdev->list, &rdev->wiphy.wdev_list);
- rdev->devlist_generation++;
+ cfg80211_init_wdev(rdev, wdev);
break;
default:
break;
@@ -3272,15 +3303,6 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
return -ENOBUFS;
}
- /*
- * For wdevs which have no associated netdev object (e.g. of type
- * NL80211_IFTYPE_P2P_DEVICE), emit the NEW_INTERFACE event here.
- * For all other types, the event will be generated from the
- * netdev notifier
- */
- if (!wdev->netdev)
- nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
-
return genlmsg_reply(msg, info);
}
@@ -3359,7 +3381,7 @@ static void get_key_callback(void *c, struct key_params *params)
params->cipher)))
goto nla_put_failure;
- if (nla_put_u8(cookie->msg, NL80211_ATTR_KEY_IDX, cookie->idx))
+ if (nla_put_u8(cookie->msg, NL80211_KEY_IDX, cookie->idx))
goto nla_put_failure;
nla_nest_end(cookie->msg, key);
@@ -3386,9 +3408,6 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_KEY_IDX])
key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
- if (key_idx > 5)
- return -EINVAL;
-
if (info->attrs[NL80211_ATTR_MAC])
mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
@@ -3396,8 +3415,6 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_KEY_TYPE]) {
u32 kt = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]);
- if (kt >= NUM_NL80211_KEYTYPES)
- return -EINVAL;
if (kt != NL80211_KEYTYPE_GROUP &&
kt != NL80211_KEYTYPE_PAIRWISE)
return -EINVAL;
@@ -3998,16 +4015,12 @@ static int validate_beacon_tx_rate(struct cfg80211_registered_device *rdev,
return 0;
}
-static int nl80211_parse_beacon(struct nlattr *attrs[],
+static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev,
+ struct nlattr *attrs[],
struct cfg80211_beacon_data *bcn)
{
bool haveinfo = false;
-
- if (!is_valid_ie_attr(attrs[NL80211_ATTR_BEACON_TAIL]) ||
- !is_valid_ie_attr(attrs[NL80211_ATTR_IE]) ||
- !is_valid_ie_attr(attrs[NL80211_ATTR_IE_PROBE_RESP]) ||
- !is_valid_ie_attr(attrs[NL80211_ATTR_IE_ASSOC_RESP]))
- return -EINVAL;
+ int err;
memset(bcn, 0, sizeof(*bcn));
@@ -4052,6 +4065,35 @@ static int nl80211_parse_beacon(struct nlattr *attrs[],
bcn->probe_resp_len = nla_len(attrs[NL80211_ATTR_PROBE_RESP]);
}
+ if (attrs[NL80211_ATTR_FTM_RESPONDER]) {
+ struct nlattr *tb[NL80211_FTM_RESP_ATTR_MAX + 1];
+
+ err = nla_parse_nested(tb, NL80211_FTM_RESP_ATTR_MAX,
+ attrs[NL80211_ATTR_FTM_RESPONDER],
+ NULL, NULL);
+ if (err)
+ return err;
+
+ if (tb[NL80211_FTM_RESP_ATTR_ENABLED] &&
+ wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER))
+ bcn->ftm_responder = 1;
+ else
+ return -EOPNOTSUPP;
+
+ if (tb[NL80211_FTM_RESP_ATTR_LCI]) {
+ bcn->lci = nla_data(tb[NL80211_FTM_RESP_ATTR_LCI]);
+ bcn->lci_len = nla_len(tb[NL80211_FTM_RESP_ATTR_LCI]);
+ }
+
+ if (tb[NL80211_FTM_RESP_ATTR_CIVICLOC]) {
+ bcn->civicloc = nla_data(tb[NL80211_FTM_RESP_ATTR_CIVICLOC]);
+ bcn->civicloc_len = nla_len(tb[NL80211_FTM_RESP_ATTR_CIVICLOC]);
+ }
+ } else {
+ bcn->ftm_responder = -1;
+ }
+
return 0;
}
@@ -4096,6 +4138,9 @@ static void nl80211_calculate_ap_params(struct cfg80211_ap_settings *params)
cap = cfg80211_find_ie(WLAN_EID_VHT_CAPABILITY, ies, ies_len);
if (cap && cap[1] >= sizeof(*params->vht_cap))
params->vht_cap = (void *)(cap + 2);
+ cap = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_CAPABILITY, ies, ies_len);
+ if (cap && cap[1] >= sizeof(*params->he_cap) + 1)
+ params->he_cap = (void *)(cap + 3);
}
static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev,
@@ -4195,7 +4240,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
!info->attrs[NL80211_ATTR_BEACON_HEAD])
return -EINVAL;
- err = nl80211_parse_beacon(info->attrs, &params.beacon);
+ err = nl80211_parse_beacon(rdev, info->attrs, &params.beacon);
if (err)
return err;
@@ -4225,14 +4270,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
- if (info->attrs[NL80211_ATTR_HIDDEN_SSID]) {
+ if (info->attrs[NL80211_ATTR_HIDDEN_SSID])
params.hidden_ssid = nla_get_u32(
info->attrs[NL80211_ATTR_HIDDEN_SSID]);
- if (params.hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE &&
- params.hidden_ssid != NL80211_HIDDEN_SSID_ZERO_LEN &&
- params.hidden_ssid != NL80211_HIDDEN_SSID_ZERO_CONTENTS)
- return -EINVAL;
- }
params.privacy = !!info->attrs[NL80211_ATTR_PRIVACY];
@@ -4262,8 +4302,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
params.p2p_ctwindow =
nla_get_u8(info->attrs[NL80211_ATTR_P2P_CTWINDOW]);
- if (params.p2p_ctwindow > 127)
- return -EINVAL;
if (params.p2p_ctwindow != 0 &&
!(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_CTWIN))
return -EINVAL;
@@ -4275,8 +4313,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
return -EINVAL;
tmp = nla_get_u8(info->attrs[NL80211_ATTR_P2P_OPPPS]);
- if (tmp > 1)
- return -EINVAL;
params.p2p_opp_ps = tmp;
if (params.p2p_opp_ps != 0 &&
!(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_OPPPS))
@@ -4379,7 +4415,7 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info)
if (!wdev->beacon_interval)
return -EINVAL;
- err = nl80211_parse_beacon(info->attrs, &params);
+ err = nl80211_parse_beacon(rdev, info->attrs, &params);
if (err)
return err;
@@ -4725,10 +4761,13 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
PUT_SINFO_U64(RX_DROP_MISC, rx_dropped_misc);
PUT_SINFO_U64(BEACON_RX, rx_beacon);
PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8);
- PUT_SINFO(ACK_SIGNAL, ack_signal, u8);
+ PUT_SINFO(RX_MPDUS, rx_mpdu_count, u32);
+ PUT_SINFO(FCS_ERROR_COUNT, fcs_err_count, u32);
if (wiphy_ext_feature_isset(&rdev->wiphy,
- NL80211_EXT_FEATURE_DATA_ACK_SIGNAL_SUPPORT))
- PUT_SINFO(DATA_ACK_SIGNAL_AVG, avg_ack_signal, s8);
+ NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT)) {
+ PUT_SINFO(ACK_SIGNAL, ack_signal, u8);
+ PUT_SINFO(ACK_SIGNAL_AVG, avg_ack_signal, s8);
+ }
#undef PUT_SINFO
#undef PUT_SINFO_U64
@@ -4807,7 +4846,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
int err;
rtnl_lock();
- err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
+ err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
if (err)
goto out_err;
@@ -5212,17 +5251,11 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
else
params.listen_interval = -1;
- if (info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]) {
- u8 tmp;
-
- tmp = nla_get_u8(info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]);
- if (tmp >= NUM_NL80211_P2P_PS_STATUS)
- return -EINVAL;
-
- params.support_p2p_ps = tmp;
- } else {
+ if (info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS])
+ params.support_p2p_ps =
+ nla_get_u8(info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]);
+ else
params.support_p2p_ps = -1;
- }
if (!info->attrs[NL80211_ATTR_MAC])
return -EINVAL;
@@ -5252,38 +5285,23 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params))
return -EINVAL;
- if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) {
+ if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION])
params.plink_action =
nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
- if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS)
- return -EINVAL;
- }
if (info->attrs[NL80211_ATTR_STA_PLINK_STATE]) {
params.plink_state =
nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]);
- if (params.plink_state >= NUM_NL80211_PLINK_STATES)
- return -EINVAL;
- if (info->attrs[NL80211_ATTR_MESH_PEER_AID]) {
+ if (info->attrs[NL80211_ATTR_MESH_PEER_AID])
params.peer_aid = nla_get_u16(
info->attrs[NL80211_ATTR_MESH_PEER_AID]);
- if (params.peer_aid > IEEE80211_MAX_AID)
- return -EINVAL;
- }
params.sta_modify_mask |= STATION_PARAM_APPLY_PLINK_STATE;
}
- if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) {
- enum nl80211_mesh_power_mode pm = nla_get_u32(
+ if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE])
+ params.local_pm = nla_get_u32(
info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]);
- if (pm <= NL80211_MESH_POWER_UNKNOWN ||
- pm > NL80211_MESH_POWER_MAX)
- return -EINVAL;
-
- params.local_pm = pm;
- }
-
if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) {
params.opmode_notif_used = true;
params.opmode_notif =
@@ -5360,13 +5378,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]);
if (info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]) {
- u8 tmp;
-
- tmp = nla_get_u8(info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]);
- if (tmp >= NUM_NL80211_P2P_PS_STATUS)
- return -EINVAL;
-
- params.support_p2p_ps = tmp;
+ params.support_p2p_ps =
+ nla_get_u8(info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]);
} else {
/*
* if not specified, assume it's supported for P2P GO interface,
@@ -5380,8 +5393,6 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]);
else
params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]);
- if (!params.aid || params.aid > IEEE80211_MAX_AID)
- return -EINVAL;
if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) {
params.capability =
@@ -5421,12 +5432,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]);
}
- if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) {
+ if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION])
params.plink_action =
nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
- if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS)
- return -EINVAL;
- }
err = nl80211_parse_sta_channel_info(info, &params);
if (err)
@@ -5658,7 +5666,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
int err;
rtnl_lock();
- err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
+ err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
if (err)
goto out_err;
@@ -5854,7 +5862,7 @@ static int nl80211_dump_mpp(struct sk_buff *skb,
int err;
rtnl_lock();
- err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
+ err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
if (err)
goto out_err;
@@ -5936,9 +5944,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
return -EINVAL;
params.p2p_ctwindow =
- nla_get_s8(info->attrs[NL80211_ATTR_P2P_CTWINDOW]);
- if (params.p2p_ctwindow < 0)
- return -EINVAL;
+ nla_get_u8(info->attrs[NL80211_ATTR_P2P_CTWINDOW]);
if (params.p2p_ctwindow != 0 &&
!(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_CTWIN))
return -EINVAL;
@@ -5950,8 +5956,6 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
return -EINVAL;
tmp = nla_get_u8(info->attrs[NL80211_ATTR_P2P_OPPPS]);
- if (tmp > 1)
- return -EINVAL;
params.p2p_opp_ps = tmp;
if (params.p2p_opp_ps &&
!(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_OPPPS))
@@ -6130,33 +6134,49 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
return -ENOBUFS;
}
-static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_ATTR_MAX+1] = {
- [NL80211_MESHCONF_RETRY_TIMEOUT] = { .type = NLA_U16 },
- [NL80211_MESHCONF_CONFIRM_TIMEOUT] = { .type = NLA_U16 },
- [NL80211_MESHCONF_HOLDING_TIMEOUT] = { .type = NLA_U16 },
- [NL80211_MESHCONF_MAX_PEER_LINKS] = { .type = NLA_U16 },
- [NL80211_MESHCONF_MAX_RETRIES] = { .type = NLA_U8 },
- [NL80211_MESHCONF_TTL] = { .type = NLA_U8 },
- [NL80211_MESHCONF_ELEMENT_TTL] = { .type = NLA_U8 },
- [NL80211_MESHCONF_AUTO_OPEN_PLINKS] = { .type = NLA_U8 },
- [NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR] = { .type = NLA_U32 },
+static const struct nla_policy
+nl80211_meshconf_params_policy[NL80211_MESHCONF_ATTR_MAX+1] = {
+ [NL80211_MESHCONF_RETRY_TIMEOUT] =
+ NLA_POLICY_RANGE(NLA_U16, 1, 255),
+ [NL80211_MESHCONF_CONFIRM_TIMEOUT] =
+ NLA_POLICY_RANGE(NLA_U16, 1, 255),
+ [NL80211_MESHCONF_HOLDING_TIMEOUT] =
+ NLA_POLICY_RANGE(NLA_U16, 1, 255),
+ [NL80211_MESHCONF_MAX_PEER_LINKS] =
+ NLA_POLICY_RANGE(NLA_U16, 0, 255),
+ [NL80211_MESHCONF_MAX_RETRIES] = NLA_POLICY_MAX(NLA_U8, 16),
+ [NL80211_MESHCONF_TTL] = NLA_POLICY_MIN(NLA_U8, 1),
+ [NL80211_MESHCONF_ELEMENT_TTL] = NLA_POLICY_MIN(NLA_U8, 1),
+ [NL80211_MESHCONF_AUTO_OPEN_PLINKS] = NLA_POLICY_MAX(NLA_U8, 1),
+ [NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR] =
+ NLA_POLICY_RANGE(NLA_U32, 1, 255),
[NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES] = { .type = NLA_U8 },
[NL80211_MESHCONF_PATH_REFRESH_TIME] = { .type = NLA_U32 },
- [NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT] = { .type = NLA_U16 },
+ [NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT] = NLA_POLICY_MIN(NLA_U16, 1),
[NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT] = { .type = NLA_U32 },
- [NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL] = { .type = NLA_U16 },
- [NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL] = { .type = NLA_U16 },
- [NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME] = { .type = NLA_U16 },
- [NL80211_MESHCONF_HWMP_ROOTMODE] = { .type = NLA_U8 },
- [NL80211_MESHCONF_HWMP_RANN_INTERVAL] = { .type = NLA_U16 },
- [NL80211_MESHCONF_GATE_ANNOUNCEMENTS] = { .type = NLA_U8 },
- [NL80211_MESHCONF_FORWARDING] = { .type = NLA_U8 },
- [NL80211_MESHCONF_RSSI_THRESHOLD] = { .type = NLA_U32 },
+ [NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL] =
+ NLA_POLICY_MIN(NLA_U16, 1),
+ [NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL] =
+ NLA_POLICY_MIN(NLA_U16, 1),
+ [NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME] =
+ NLA_POLICY_MIN(NLA_U16, 1),
+ [NL80211_MESHCONF_HWMP_ROOTMODE] = NLA_POLICY_MAX(NLA_U8, 4),
+ [NL80211_MESHCONF_HWMP_RANN_INTERVAL] =
+ NLA_POLICY_MIN(NLA_U16, 1),
+ [NL80211_MESHCONF_GATE_ANNOUNCEMENTS] = NLA_POLICY_MAX(NLA_U8, 1),
+ [NL80211_MESHCONF_FORWARDING] = NLA_POLICY_MAX(NLA_U8, 1),
+ [NL80211_MESHCONF_RSSI_THRESHOLD] =
+ NLA_POLICY_RANGE(NLA_S32, -255, 0),
[NL80211_MESHCONF_HT_OPMODE] = { .type = NLA_U16 },
[NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT] = { .type = NLA_U32 },
- [NL80211_MESHCONF_HWMP_ROOT_INTERVAL] = { .type = NLA_U16 },
- [NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] = { .type = NLA_U16 },
- [NL80211_MESHCONF_POWER_MODE] = { .type = NLA_U32 },
+ [NL80211_MESHCONF_HWMP_ROOT_INTERVAL] =
+ NLA_POLICY_MIN(NLA_U16, 1),
+ [NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] =
+ NLA_POLICY_MIN(NLA_U16, 1),
+ [NL80211_MESHCONF_POWER_MODE] =
+ NLA_POLICY_RANGE(NLA_U32,
+ NL80211_MESH_POWER_ACTIVE,
+ NL80211_MESH_POWER_MAX),
[NL80211_MESHCONF_AWAKE_WINDOW] = { .type = NLA_U16 },
[NL80211_MESHCONF_PLINK_TIMEOUT] = { .type = NLA_U32 },
};
@@ -6169,68 +6189,12 @@ static const struct nla_policy
[NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG },
[NL80211_MESH_SETUP_AUTH_PROTOCOL] = { .type = NLA_U8 },
[NL80211_MESH_SETUP_USERSPACE_MPM] = { .type = NLA_FLAG },
- [NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY,
- .len = IEEE80211_MAX_DATA_LEN },
+ [NL80211_MESH_SETUP_IE] =
+ NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr,
+ IEEE80211_MAX_DATA_LEN),
[NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG },
};
-static int nl80211_check_bool(const struct nlattr *nla, u8 min, u8 max, bool *out)
-{
- u8 val = nla_get_u8(nla);
- if (val < min || val > max)
- return -EINVAL;
- *out = val;
- return 0;
-}
-
-static int nl80211_check_u8(const struct nlattr *nla, u8 min, u8 max, u8 *out)
-{
- u8 val = nla_get_u8(nla);
- if (val < min || val > max)
- return -EINVAL;
- *out = val;
- return 0;
-}
-
-static int nl80211_check_u16(const struct nlattr *nla, u16 min, u16 max, u16 *out)
-{
- u16 val = nla_get_u16(nla);
- if (val < min || val > max)
- return -EINVAL;
- *out = val;
- return 0;
-}
-
-static int nl80211_check_u32(const struct nlattr *nla, u32 min, u32 max, u32 *out)
-{
- u32 val = nla_get_u32(nla);
- if (val < min || val > max)
- return -EINVAL;
- *out = val;
- return 0;
-}
-
-static int nl80211_check_s32(const struct nlattr *nla, s32 min, s32 max, s32 *out)
-{
- s32 val = nla_get_s32(nla);
- if (val < min || val > max)
- return -EINVAL;
- *out = val;
- return 0;
-}
-
-static int nl80211_check_power_mode(const struct nlattr *nla,
- enum nl80211_mesh_power_mode min,
- enum nl80211_mesh_power_mode max,
- enum nl80211_mesh_power_mode *out)
-{
- u32 val = nla_get_u32(nla);
- if (val < min || val > max)
- return -EINVAL;
- *out = val;
- return 0;
-}
-
static int nl80211_parse_mesh_config(struct genl_info *info,
struct mesh_config *cfg,
u32 *mask_out)
@@ -6239,13 +6203,12 @@ static int nl80211_parse_mesh_config(struct genl_info *info,
u32 mask = 0;
u16 ht_opmode;
-#define FILL_IN_MESH_PARAM_IF_SET(tb, cfg, param, min, max, mask, attr, fn) \
-do { \
- if (tb[attr]) { \
- if (fn(tb[attr], min, max, &cfg->param)) \
- return -EINVAL; \
- mask |= (1 << (attr - 1)); \
- } \
+#define FILL_IN_MESH_PARAM_IF_SET(tb, cfg, param, mask, attr, fn) \
+do { \
+ if (tb[attr]) { \
+ cfg->param = fn(tb[attr]); \
+ mask |= BIT((attr) - 1); \
+ } \
} while (0)
if (!info->attrs[NL80211_ATTR_MESH_CONFIG])
@@ -6260,75 +6223,73 @@ do { \
BUILD_BUG_ON(NL80211_MESHCONF_ATTR_MAX > 32);
/* Fill in the params struct */
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, 1, 255,
- mask, NL80211_MESHCONF_RETRY_TIMEOUT,
- nl80211_check_u16);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, 1, 255,
- mask, NL80211_MESHCONF_CONFIRM_TIMEOUT,
- nl80211_check_u16);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout, 1, 255,
- mask, NL80211_MESHCONF_HOLDING_TIMEOUT,
- nl80211_check_u16);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks, 0, 255,
- mask, NL80211_MESHCONF_MAX_PEER_LINKS,
- nl80211_check_u16);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries, 0, 16,
- mask, NL80211_MESHCONF_MAX_RETRIES,
- nl80211_check_u8);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, 1, 255,
- mask, NL80211_MESHCONF_TTL, nl80211_check_u8);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl, 1, 255,
- mask, NL80211_MESHCONF_ELEMENT_TTL,
- nl80211_check_u8);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, 0, 1,
- mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS,
- nl80211_check_bool);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, mask,
+ NL80211_MESHCONF_RETRY_TIMEOUT, nla_get_u16);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, mask,
+ NL80211_MESHCONF_CONFIRM_TIMEOUT,
+ nla_get_u16);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout, mask,
+ NL80211_MESHCONF_HOLDING_TIMEOUT,
+ nla_get_u16);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks, mask,
+ NL80211_MESHCONF_MAX_PEER_LINKS,
+ nla_get_u16);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries, mask,
+ NL80211_MESHCONF_MAX_RETRIES, nla_get_u8);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, mask,
+ NL80211_MESHCONF_TTL, nla_get_u8);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl, mask,
+ NL80211_MESHCONF_ELEMENT_TTL, nla_get_u8);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, mask,
+ NL80211_MESHCONF_AUTO_OPEN_PLINKS,
+ nla_get_u8);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNbrOffsetMaxNeighbor,
- 1, 255, mask,
+ mask,
NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR,
- nl80211_check_u32);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, 0, 255,
- mask, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES,
- nl80211_check_u8);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time, 1, 65535,
- mask, NL80211_MESHCONF_PATH_REFRESH_TIME,
- nl80211_check_u32);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout, 1, 65535,
- mask, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT,
- nl80211_check_u16);
+ nla_get_u32);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, mask,
+ NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES,
+ nla_get_u8);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time, mask,
+ NL80211_MESHCONF_PATH_REFRESH_TIME,
+ nla_get_u32);
+ if (mask & BIT(NL80211_MESHCONF_PATH_REFRESH_TIME) &&
+ (cfg->path_refresh_time < 1 || cfg->path_refresh_time > 65535))
+ return -EINVAL;
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout, mask,
+ NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT,
+ nla_get_u16);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout,
- 1, 65535, mask,
+ mask,
NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT,
- nl80211_check_u32);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval,
- 1, 65535, mask,
+ nla_get_u32);
+ if (mask & BIT(NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT) &&
+ (cfg->dot11MeshHWMPactivePathTimeout < 1 ||
+ cfg->dot11MeshHWMPactivePathTimeout > 65535))
+ return -EINVAL;
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval, mask,
NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL,
- nl80211_check_u16);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPperrMinInterval,
- 1, 65535, mask,
+ nla_get_u16);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPperrMinInterval, mask,
NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL,
- nl80211_check_u16);
+ nla_get_u16);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
- dot11MeshHWMPnetDiameterTraversalTime,
- 1, 65535, mask,
+ dot11MeshHWMPnetDiameterTraversalTime, mask,
NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
- nl80211_check_u16);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRootMode, 0, 4,
- mask, NL80211_MESHCONF_HWMP_ROOTMODE,
- nl80211_check_u8);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRannInterval, 1, 65535,
- mask, NL80211_MESHCONF_HWMP_RANN_INTERVAL,
- nl80211_check_u16);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
- dot11MeshGateAnnouncementProtocol, 0, 1,
+ nla_get_u16);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRootMode, mask,
+ NL80211_MESHCONF_HWMP_ROOTMODE, nla_get_u8);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRannInterval, mask,
+ NL80211_MESHCONF_HWMP_RANN_INTERVAL,
+ nla_get_u16);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshGateAnnouncementProtocol,
mask, NL80211_MESHCONF_GATE_ANNOUNCEMENTS,
- nl80211_check_bool);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, 0, 1,
- mask, NL80211_MESHCONF_FORWARDING,
- nl80211_check_bool);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, -255, 0,
- mask, NL80211_MESHCONF_RSSI_THRESHOLD,
- nl80211_check_s32);
+ nla_get_u8);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, mask,
+ NL80211_MESHCONF_FORWARDING, nla_get_u8);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, mask,
+ NL80211_MESHCONF_RSSI_THRESHOLD,
+ nla_get_s32);
/*
* Check HT operation mode based on
* IEEE 802.11-2016 9.4.2.57 HT Operation element.
@@ -6347,29 +6308,27 @@ do { \
cfg->ht_opmode = ht_opmode;
mask |= (1 << (NL80211_MESHCONF_HT_OPMODE - 1));
}
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout,
- 1, 65535, mask,
- NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT,
- nl80211_check_u32);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval, 1, 65535,
- mask, NL80211_MESHCONF_HWMP_ROOT_INTERVAL,
- nl80211_check_u16);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
- dot11MeshHWMPconfirmationInterval,
- 1, 65535, mask,
+ dot11MeshHWMPactivePathToRootTimeout, mask,
+ NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT,
+ nla_get_u32);
+ if (mask & BIT(NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT) &&
+ (cfg->dot11MeshHWMPactivePathToRootTimeout < 1 ||
+ cfg->dot11MeshHWMPactivePathToRootTimeout > 65535))
+ return -EINVAL;
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval, mask,
+ NL80211_MESHCONF_HWMP_ROOT_INTERVAL,
+ nla_get_u16);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPconfirmationInterval,
+ mask,
NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL,
- nl80211_check_u16);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, power_mode,
- NL80211_MESH_POWER_ACTIVE,
- NL80211_MESH_POWER_MAX,
- mask, NL80211_MESHCONF_POWER_MODE,
- nl80211_check_power_mode);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration,
- 0, 65535, mask,
- NL80211_MESHCONF_AWAKE_WINDOW, nl80211_check_u16);
- FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, 0, 0xffffffff,
- mask, NL80211_MESHCONF_PLINK_TIMEOUT,
- nl80211_check_u32);
+ nla_get_u16);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, power_mode, mask,
+ NL80211_MESHCONF_POWER_MODE, nla_get_u32);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration, mask,
+ NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16);
+ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, mask,
+ NL80211_MESHCONF_PLINK_TIMEOUT, nla_get_u32);
if (mask_out)
*mask_out = mask;
@@ -6412,8 +6371,6 @@ static int nl80211_parse_mesh_setup(struct genl_info *info,
if (tb[NL80211_MESH_SETUP_IE]) {
struct nlattr *ieattr =
tb[NL80211_MESH_SETUP_IE];
- if (!is_valid_ie_attr(ieattr))
- return -EINVAL;
setup->ie = nla_data(ieattr);
setup->ie_len = nla_len(ieattr);
}
@@ -7046,9 +7003,6 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
int err, tmp, n_ssids = 0, n_channels, i;
size_t ie_len;
- if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
- return -EINVAL;
-
wiphy = &rdev->wiphy;
if (wdev->iftype == NL80211_IFTYPE_NAN)
@@ -7402,9 +7356,6 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
struct nlattr *tb[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1];
s32 default_match_rssi = NL80211_SCAN_RSSI_THOLD_OFF;
- if (!is_valid_ie_attr(attrs[NL80211_ATTR_IE]))
- return ERR_PTR(-EINVAL);
-
if (attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
n_channels = validate_scan_freqs(
attrs[NL80211_ATTR_SCAN_FREQUENCIES]);
@@ -7764,7 +7715,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
*/
if (want_multi && rdev->wiphy.max_sched_scan_reqs > 1) {
while (!sched_scan_req->reqid)
- sched_scan_req->reqid = rdev->wiphy.cookie_counter++;
+ sched_scan_req->reqid = cfg80211_assign_cookie(rdev);
}
err = rdev_sched_scan_start(rdev, dev, sched_scan_req);
@@ -7940,7 +7891,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
if (!need_new_beacon)
goto skip_beacons;
- err = nl80211_parse_beacon(info->attrs, &params.beacon_after);
+ err = nl80211_parse_beacon(rdev, info->attrs, &params.beacon_after);
if (err)
return err;
@@ -7950,7 +7901,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
- err = nl80211_parse_beacon(csa_attrs, &params.beacon_csa);
+ err = nl80211_parse_beacon(rdev, csa_attrs, &params.beacon_csa);
if (err)
return err;
@@ -8187,7 +8138,7 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
int err;
rtnl_lock();
- err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
+ err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
if (err) {
rtnl_unlock();
return err;
@@ -8308,7 +8259,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
bool radio_stats;
rtnl_lock();
- res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
+ res = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
if (res)
goto out_err;
@@ -8372,9 +8323,6 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
struct key_parse key;
bool local_state_change;
- if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
- return -EINVAL;
-
if (!info->attrs[NL80211_ATTR_MAC])
return -EINVAL;
@@ -8613,9 +8561,6 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid)
return -EPERM;
- if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
- return -EINVAL;
-
if (!info->attrs[NL80211_ATTR_MAC] ||
!info->attrs[NL80211_ATTR_SSID] ||
!info->attrs[NL80211_ATTR_WIPHY_FREQ])
@@ -8739,9 +8684,6 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid)
return -EPERM;
- if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
- return -EINVAL;
-
if (!info->attrs[NL80211_ATTR_MAC])
return -EINVAL;
@@ -8790,9 +8732,6 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid)
return -EPERM;
- if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
- return -EINVAL;
-
if (!info->attrs[NL80211_ATTR_MAC])
return -EINVAL;
@@ -8867,9 +8806,6 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
memset(&ibss, 0, sizeof(ibss));
- if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
- return -EINVAL;
-
if (!info->attrs[NL80211_ATTR_SSID] ||
!nla_len(info->attrs[NL80211_ATTR_SSID]))
return -EINVAL;
@@ -9307,9 +9243,6 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
memset(&connect, 0, sizeof(connect));
- if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
- return -EINVAL;
-
if (!info->attrs[NL80211_ATTR_SSID] ||
!nla_len(info->attrs[NL80211_ATTR_SSID]))
return -EINVAL;
@@ -9368,11 +9301,6 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
!wiphy_ext_feature_isset(&rdev->wiphy,
NL80211_EXT_FEATURE_MFP_OPTIONAL))
return -EOPNOTSUPP;
-
- if (connect.mfp != NL80211_MFP_REQUIRED &&
- connect.mfp != NL80211_MFP_NO &&
- connect.mfp != NL80211_MFP_OPTIONAL)
- return -EINVAL;
} else {
connect.mfp = NL80211_MFP_NO;
}
@@ -9545,8 +9473,6 @@ static int nl80211_update_connect_params(struct sk_buff *skb,
return -EOPNOTSUPP;
if (info->attrs[NL80211_ATTR_IE]) {
- if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
- return -EINVAL;
connect.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
changed |= UPDATE_ASSOC_IES;
@@ -10131,9 +10057,6 @@ static int nl80211_set_power_save(struct sk_buff *skb, struct genl_info *info)
ps_state = nla_get_u32(info->attrs[NL80211_ATTR_PS_STATE]);
- if (ps_state != NL80211_PS_DISABLED && ps_state != NL80211_PS_ENABLED)
- return -EINVAL;
-
wdev = dev->ieee80211_ptr;
if (!rdev->ops->set_power_mgmt)
@@ -10696,8 +10619,7 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg,
if (!scan_plan)
return -ENOBUFS;
- if (!scan_plan ||
- nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_INTERVAL,
+ if (nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_INTERVAL,
req->scan_plans[i].interval) ||
(req->scan_plans[i].iterations &&
nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_ITERATIONS,
@@ -11295,9 +11217,6 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
if (tb[NL80211_ATTR_COALESCE_RULE_CONDITION])
new_rule->condition =
nla_get_u32(tb[NL80211_ATTR_COALESCE_RULE_CONDITION]);
- if (new_rule->condition != NL80211_COALESCE_CONDITION_MATCH &&
- new_rule->condition != NL80211_COALESCE_CONDITION_NO_MATCH)
- return -EINVAL;
if (!tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN])
return -EINVAL;
@@ -11650,8 +11569,6 @@ static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info)
conf.master_pref =
nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]);
- if (!conf.master_pref)
- return -EINVAL;
if (info->attrs[NL80211_ATTR_BANDS]) {
u32 bands = nla_get_u32(info->attrs[NL80211_ATTR_BANDS]);
@@ -11769,7 +11686,7 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
if (!func)
return -ENOMEM;
- func->cookie = wdev->wiphy->cookie_counter++;
+ func->cookie = cfg80211_assign_cookie(rdev);
if (!tb[NL80211_NAN_FUNC_TYPE] ||
nla_get_u8(tb[NL80211_NAN_FUNC_TYPE]) > NL80211_NAN_FUNC_MAX_TYPE) {
@@ -12215,8 +12132,7 @@ static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info)
return -EOPNOTSUPP;
if (!info->attrs[NL80211_ATTR_MDID] ||
- !info->attrs[NL80211_ATTR_IE] ||
- !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
+ !info->attrs[NL80211_ATTR_IE])
return -EINVAL;
memset(&ft_params, 0, sizeof(ft_params));
@@ -12636,12 +12552,7 @@ static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]);
- if (tsid >= IEEE80211_NUM_TIDS)
- return -EINVAL;
-
up = nla_get_u8(info->attrs[NL80211_ATTR_USER_PRIO]);
- if (up >= IEEE80211_NUM_UPS)
- return -EINVAL;
/* WMM uses TIDs 0-7 even for TSPEC */
if (tsid >= IEEE80211_FIRST_TSPEC_TSID) {
@@ -12999,6 +12910,76 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info)
return err;
}
+static int nl80211_get_ftm_responder_stats(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct cfg80211_ftm_responder_stats ftm_stats = {};
+ struct sk_buff *msg;
+ void *hdr;
+ struct nlattr *ftm_stats_attr;
+ int err;
+
+ if (wdev->iftype != NL80211_IFTYPE_AP || !wdev->beacon_interval)
+ return -EOPNOTSUPP;
+
+ err = rdev_get_ftm_responder_stats(rdev, dev, &ftm_stats);
+ if (err)
+ return err;
+
+ if (!ftm_stats.filled)
+ return -ENODATA;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
+ NL80211_CMD_GET_FTM_RESPONDER_STATS);
+ if (!hdr)
+ return -ENOBUFS;
+
+ if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
+ goto nla_put_failure;
+
+ ftm_stats_attr = nla_nest_start(msg, NL80211_ATTR_FTM_RESPONDER_STATS);
+ if (!ftm_stats_attr)
+ goto nla_put_failure;
+
+#define SET_FTM(field, name, type) \
+ do { if ((ftm_stats.filled & BIT(NL80211_FTM_STATS_ ## name)) && \
+ nla_put_ ## type(msg, NL80211_FTM_STATS_ ## name, \
+ ftm_stats.field)) \
+ goto nla_put_failure; } while (0)
+#define SET_FTM_U64(field, name) \
+ do { if ((ftm_stats.filled & BIT(NL80211_FTM_STATS_ ## name)) && \
+ nla_put_u64_64bit(msg, NL80211_FTM_STATS_ ## name, \
+ ftm_stats.field, NL80211_FTM_STATS_PAD)) \
+ goto nla_put_failure; } while (0)
+
+ SET_FTM(success_num, SUCCESS_NUM, u32);
+ SET_FTM(partial_num, PARTIAL_NUM, u32);
+ SET_FTM(failed_num, FAILED_NUM, u32);
+ SET_FTM(asap_num, ASAP_NUM, u32);
+ SET_FTM(non_asap_num, NON_ASAP_NUM, u32);
+ SET_FTM_U64(total_duration_ms, TOTAL_DURATION_MSEC);
+ SET_FTM(unknown_triggers_num, UNKNOWN_TRIGGERS_NUM, u32);
+ SET_FTM(reschedule_requests_num, RESCHEDULE_REQUESTS_NUM, u32);
+ SET_FTM(out_of_window_triggers_num, OUT_OF_WINDOW_TRIGGERS_NUM, u32);
+#undef SET_FTM
+
+ nla_nest_end(msg, ftm_stats_attr);
+
+ genlmsg_end(msg, hdr);
+ return genlmsg_reply(msg, info);
+
+nla_put_failure:
+ nlmsg_free(msg);
+ return -ENOBUFS;
+}
+
#define NL80211_FLAG_NEED_WIPHY 0x01
#define NL80211_FLAG_NEED_NETDEV 0x02
#define NL80211_FLAG_NEED_RTNL 0x04
@@ -13910,6 +13891,13 @@ static const struct genl_ops nl80211_ops[] = {
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NEED_RTNL,
},
+ {
+ .cmd = NL80211_CMD_GET_FTM_RESPONDER_STATS,
+ .doit = nl80211_get_ftm_responder_stats,
+ .policy = nl80211_policy,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV |
+ NL80211_FLAG_NEED_RTNL,
+ },
};
static struct genl_family nl80211_fam __ro_after_init = {
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 364f5d67f05b..51380b5c32f2 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1232,4 +1232,19 @@ rdev_external_auth(struct cfg80211_registered_device *rdev,
return ret;
}
+static inline int
+rdev_get_ftm_responder_stats(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct cfg80211_ftm_responder_stats *ftm_stats)
+{
+ int ret = -EOPNOTSUPP;
+
+ trace_rdev_get_ftm_responder_stats(&rdev->wiphy, dev, ftm_stats);
+ if (rdev->ops->get_ftm_responder_stats)
+ ret = rdev->ops->get_ftm_responder_stats(&rdev->wiphy, dev,
+ ftm_stats);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
+
#endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 24cfa2776f50..ecfb1a06dbb2 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -847,22 +847,36 @@ static bool valid_regdb(const u8 *data, unsigned int size)
return true;
}
-static void set_wmm_rule(struct ieee80211_reg_rule *rrule,
- struct fwdb_wmm_rule *wmm)
-{
- struct ieee80211_wmm_rule *rule = &rrule->wmm_rule;
- unsigned int i;
+static void set_wmm_rule(const struct fwdb_header *db,
+ const struct fwdb_country *country,
+ const struct fwdb_rule *rule,
+ struct ieee80211_reg_rule *rrule)
+{
+ struct ieee80211_wmm_rule *wmm_rule = &rrule->wmm_rule;
+ struct fwdb_wmm_rule *wmm;
+ unsigned int i, wmm_ptr;
+
+ wmm_ptr = be16_to_cpu(rule->wmm_ptr) << 2;
+ wmm = (void *)((u8 *)db + wmm_ptr);
+
+ if (!valid_wmm(wmm)) {
+ pr_err("Invalid regulatory WMM rule %u-%u in domain %c%c\n",
+ be32_to_cpu(rule->start), be32_to_cpu(rule->end),
+ country->alpha2[0], country->alpha2[1]);
+ return;
+ }
for (i = 0; i < IEEE80211_NUM_ACS; i++) {
- rule->client[i].cw_min =
+ wmm_rule->client[i].cw_min =
ecw2cw((wmm->client[i].ecw & 0xf0) >> 4);
- rule->client[i].cw_max = ecw2cw(wmm->client[i].ecw & 0x0f);
- rule->client[i].aifsn = wmm->client[i].aifsn;
- rule->client[i].cot = 1000 * be16_to_cpu(wmm->client[i].cot);
- rule->ap[i].cw_min = ecw2cw((wmm->ap[i].ecw & 0xf0) >> 4);
- rule->ap[i].cw_max = ecw2cw(wmm->ap[i].ecw & 0x0f);
- rule->ap[i].aifsn = wmm->ap[i].aifsn;
- rule->ap[i].cot = 1000 * be16_to_cpu(wmm->ap[i].cot);
+ wmm_rule->client[i].cw_max = ecw2cw(wmm->client[i].ecw & 0x0f);
+ wmm_rule->client[i].aifsn = wmm->client[i].aifsn;
+ wmm_rule->client[i].cot =
+ 1000 * be16_to_cpu(wmm->client[i].cot);
+ wmm_rule->ap[i].cw_min = ecw2cw((wmm->ap[i].ecw & 0xf0) >> 4);
+ wmm_rule->ap[i].cw_max = ecw2cw(wmm->ap[i].ecw & 0x0f);
+ wmm_rule->ap[i].aifsn = wmm->ap[i].aifsn;
+ wmm_rule->ap[i].cot = 1000 * be16_to_cpu(wmm->ap[i].cot);
}
rrule->has_wmm = true;
@@ -870,7 +884,7 @@ static void set_wmm_rule(struct ieee80211_reg_rule *rrule,
static int __regdb_query_wmm(const struct fwdb_header *db,
const struct fwdb_country *country, int freq,
- struct ieee80211_reg_rule *rule)
+ struct ieee80211_reg_rule *rrule)
{
unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
struct fwdb_collection *coll = (void *)((u8 *)db + ptr);
@@ -879,18 +893,14 @@ static int __regdb_query_wmm(const struct fwdb_header *db,
for (i = 0; i < coll->n_rules; i++) {
__be16 *rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2));
unsigned int rule_ptr = be16_to_cpu(rules_ptr[i]) << 2;
- struct fwdb_rule *rrule = (void *)((u8 *)db + rule_ptr);
- struct fwdb_wmm_rule *wmm;
- unsigned int wmm_ptr;
+ struct fwdb_rule *rule = (void *)((u8 *)db + rule_ptr);
- if (rrule->len < offsetofend(struct fwdb_rule, wmm_ptr))
+ if (rule->len < offsetofend(struct fwdb_rule, wmm_ptr))
continue;
- if (freq >= KHZ_TO_MHZ(be32_to_cpu(rrule->start)) &&
- freq <= KHZ_TO_MHZ(be32_to_cpu(rrule->end))) {
- wmm_ptr = be16_to_cpu(rrule->wmm_ptr) << 2;
- wmm = (void *)((u8 *)db + wmm_ptr);
- set_wmm_rule(rule, wmm);
+ if (freq >= KHZ_TO_MHZ(be32_to_cpu(rule->start)) &&
+ freq <= KHZ_TO_MHZ(be32_to_cpu(rule->end))) {
+ set_wmm_rule(db, country, rule, rrule);
return 0;
}
}
@@ -972,12 +982,8 @@ static int regdb_query_country(const struct fwdb_header *db,
if (rule->len >= offsetofend(struct fwdb_rule, cac_timeout))
rrule->dfs_cac_ms =
1000 * be16_to_cpu(rule->cac_timeout);
- if (rule->len >= offsetofend(struct fwdb_rule, wmm_ptr)) {
- u32 wmm_ptr = be16_to_cpu(rule->wmm_ptr) << 2;
- struct fwdb_wmm_rule *wmm = (void *)((u8 *)db + wmm_ptr);
-
- set_wmm_rule(rrule, wmm);
- }
+ if (rule->len >= offsetofend(struct fwdb_rule, wmm_ptr))
+ set_wmm_rule(db, country, rule, rrule);
}
return reg_schedule_apply(regdom);
@@ -3186,13 +3192,59 @@ static void restore_regulatory_settings(bool reset_user)
schedule_work(&reg_work);
}
+static bool is_wiphy_all_set_reg_flag(enum ieee80211_regulatory_flags flag)
+{
+ struct cfg80211_registered_device *rdev;
+ struct wireless_dev *wdev;
+
+ list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
+ wdev_lock(wdev);
+ if (!(wdev->wiphy->regulatory_flags & flag)) {
+ wdev_unlock(wdev);
+ return false;
+ }
+ wdev_unlock(wdev);
+ }
+ }
+
+ return true;
+}
+
void regulatory_hint_disconnect(void)
{
+ /* Restore of regulatory settings is not required when wiphy(s)
+ * ignore IE from connected access point but clearance of beacon hints
+ * is required when wiphy(s) supports beacon hints.
+ */
+ if (is_wiphy_all_set_reg_flag(REGULATORY_COUNTRY_IE_IGNORE)) {
+ struct reg_beacon *reg_beacon, *btmp;
+
+ if (is_wiphy_all_set_reg_flag(REGULATORY_DISABLE_BEACON_HINTS))
+ return;
+
+ spin_lock_bh(&reg_pending_beacons_lock);
+ list_for_each_entry_safe(reg_beacon, btmp,
+ &reg_pending_beacons, list) {
+ list_del(&reg_beacon->list);
+ kfree(reg_beacon);
+ }
+ spin_unlock_bh(&reg_pending_beacons_lock);
+
+ list_for_each_entry_safe(reg_beacon, btmp,
+ &reg_beacon_list, list) {
+ list_del(&reg_beacon->list);
+ kfree(reg_beacon);
+ }
+
+ return;
+ }
+
pr_debug("All devices are disconnected, going to restore regulatory settings\n");
restore_regulatory_settings(false);
}
-static bool freq_is_chan_12_13_14(u16 freq)
+static bool freq_is_chan_12_13_14(u32 freq)
{
if (freq == ieee80211_channel_to_frequency(12, NL80211_BAND_2GHZ) ||
freq == ieee80211_channel_to_frequency(13, NL80211_BAND_2GHZ) ||
@@ -3779,6 +3831,15 @@ static int __init regulatory_init_db(void)
{
int err;
+ /*
+ * It's possible that - due to other bugs/issues - cfg80211
+ * never called regulatory_init() below, or that it failed;
+ * in that case, don't try to do any further work here as
+ * it's doomed to lead to crashes.
+ */
+ if (IS_ERR_OR_NULL(reg_pdev))
+ return -EINVAL;
+
err = load_builtin_regdb_keys();
if (err)
return err;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 7c73510b161f..c6a9446b4e6b 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -112,7 +112,7 @@
} while (0)
#define CHAN_ENTRY __field(enum nl80211_band, band) \
- __field(u16, center_freq)
+ __field(u32, center_freq)
#define CHAN_ASSIGN(chan) \
do { \
if (chan) { \
@@ -2368,6 +2368,140 @@ TRACE_EVENT(rdev_external_auth,
__entry->bssid, __entry->ssid, __entry->status)
);
+TRACE_EVENT(rdev_start_radar_detection,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_chan_def *chandef,
+ u32 cac_time_ms),
+ TP_ARGS(wiphy, netdev, chandef, cac_time_ms),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ CHAN_DEF_ENTRY
+ __field(u32, cac_time_ms)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ CHAN_DEF_ASSIGN(chandef);
+ __entry->cac_time_ms = cac_time_ms;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT
+ ", cac_time_ms=%u",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
+ __entry->cac_time_ms)
+);
+
+TRACE_EVENT(rdev_set_mcast_rate,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ int *mcast_rate),
+ TP_ARGS(wiphy, netdev, mcast_rate),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __array(int, mcast_rate, NUM_NL80211_BANDS)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ memcpy(__entry->mcast_rate, mcast_rate,
+ sizeof(int) * NUM_NL80211_BANDS);
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", "
+ "mcast_rates [2.4GHz=0x%x, 5.2GHz=0x%x, 60GHz=0x%x]",
+ WIPHY_PR_ARG, NETDEV_PR_ARG,
+ __entry->mcast_rate[NL80211_BAND_2GHZ],
+ __entry->mcast_rate[NL80211_BAND_5GHZ],
+ __entry->mcast_rate[NL80211_BAND_60GHZ])
+);
+
+TRACE_EVENT(rdev_set_coalesce,
+ TP_PROTO(struct wiphy *wiphy, struct cfg80211_coalesce *coalesce),
+ TP_ARGS(wiphy, coalesce),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ __field(int, n_rules)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ __entry->n_rules = coalesce ? coalesce->n_rules : 0;
+ ),
+ TP_printk(WIPHY_PR_FMT ", n_rules=%d",
+ WIPHY_PR_ARG, __entry->n_rules)
+);
+
+DEFINE_EVENT(wiphy_wdev_evt, rdev_abort_scan,
+ TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
+ TP_ARGS(wiphy, wdev)
+);
+
+TRACE_EVENT(rdev_set_multicast_to_unicast,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ const bool enabled),
+ TP_ARGS(wiphy, netdev, enabled),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __field(bool, enabled)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ __entry->enabled = enabled;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", unicast: %s",
+ WIPHY_PR_ARG, NETDEV_PR_ARG,
+ BOOL_TO_STR(__entry->enabled))
+);
+
+DEFINE_EVENT(wiphy_wdev_evt, rdev_get_txq_stats,
+ TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
+ TP_ARGS(wiphy, wdev)
+);
+
+TRACE_EVENT(rdev_get_ftm_responder_stats,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_ftm_responder_stats *ftm_stats),
+
+ TP_ARGS(wiphy, netdev, ftm_stats),
+
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __field(u64, timestamp)
+ __field(u32, success_num)
+ __field(u32, partial_num)
+ __field(u32, failed_num)
+ __field(u32, asap_num)
+ __field(u32, non_asap_num)
+ __field(u64, duration)
+ __field(u32, unknown_triggers)
+ __field(u32, reschedule)
+ __field(u32, out_of_window)
+ ),
+
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ __entry->success_num = ftm_stats->success_num;
+ __entry->partial_num = ftm_stats->partial_num;
+ __entry->failed_num = ftm_stats->failed_num;
+ __entry->asap_num = ftm_stats->asap_num;
+ __entry->non_asap_num = ftm_stats->non_asap_num;
+ __entry->duration = ftm_stats->total_duration_ms;
+ __entry->unknown_triggers = ftm_stats->unknown_triggers_num;
+ __entry->reschedule = ftm_stats->reschedule_requests_num;
+ __entry->out_of_window = ftm_stats->out_of_window_triggers_num;
+ ),
+
+ TP_printk(WIPHY_PR_FMT "Ftm responder stats: success %u, partial %u, "
+ "failed %u, asap %u, non asap %u, total duration %llu, unknown "
+ "triggers %u, rescheduled %u, out of window %u", WIPHY_PR_ARG,
+ __entry->success_num, __entry->partial_num, __entry->failed_num,
+ __entry->asap_num, __entry->non_asap_num, __entry->duration,
+ __entry->unknown_triggers, __entry->reschedule,
+ __entry->out_of_window)
+);
+
/*************************************************************
* cfg80211 exported functions traces *
*************************************************************/
@@ -3160,105 +3294,6 @@ TRACE_EVENT(cfg80211_stop_iface,
TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT,
WIPHY_PR_ARG, WDEV_PR_ARG)
);
-
-TRACE_EVENT(rdev_start_radar_detection,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
- struct cfg80211_chan_def *chandef,
- u32 cac_time_ms),
- TP_ARGS(wiphy, netdev, chandef, cac_time_ms),
- TP_STRUCT__entry(
- WIPHY_ENTRY
- NETDEV_ENTRY
- CHAN_DEF_ENTRY
- __field(u32, cac_time_ms)
- ),
- TP_fast_assign(
- WIPHY_ASSIGN;
- NETDEV_ASSIGN;
- CHAN_DEF_ASSIGN(chandef);
- __entry->cac_time_ms = cac_time_ms;
- ),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT
- ", cac_time_ms=%u",
- WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
- __entry->cac_time_ms)
-);
-
-TRACE_EVENT(rdev_set_mcast_rate,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
- int *mcast_rate),
- TP_ARGS(wiphy, netdev, mcast_rate),
- TP_STRUCT__entry(
- WIPHY_ENTRY
- NETDEV_ENTRY
- __array(int, mcast_rate, NUM_NL80211_BANDS)
- ),
- TP_fast_assign(
- WIPHY_ASSIGN;
- NETDEV_ASSIGN;
- memcpy(__entry->mcast_rate, mcast_rate,
- sizeof(int) * NUM_NL80211_BANDS);
- ),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", "
- "mcast_rates [2.4GHz=0x%x, 5.2GHz=0x%x, 60GHz=0x%x]",
- WIPHY_PR_ARG, NETDEV_PR_ARG,
- __entry->mcast_rate[NL80211_BAND_2GHZ],
- __entry->mcast_rate[NL80211_BAND_5GHZ],
- __entry->mcast_rate[NL80211_BAND_60GHZ])
-);
-
-TRACE_EVENT(rdev_set_coalesce,
- TP_PROTO(struct wiphy *wiphy, struct cfg80211_coalesce *coalesce),
- TP_ARGS(wiphy, coalesce),
- TP_STRUCT__entry(
- WIPHY_ENTRY
- __field(int, n_rules)
- ),
- TP_fast_assign(
- WIPHY_ASSIGN;
- __entry->n_rules = coalesce ? coalesce->n_rules : 0;
- ),
- TP_printk(WIPHY_PR_FMT ", n_rules=%d",
- WIPHY_PR_ARG, __entry->n_rules)
-);
-
-DEFINE_EVENT(wiphy_wdev_evt, rdev_abort_scan,
- TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
- TP_ARGS(wiphy, wdev)
-);
-
-TRACE_EVENT(rdev_set_multicast_to_unicast,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
- const bool enabled),
- TP_ARGS(wiphy, netdev, enabled),
- TP_STRUCT__entry(
- WIPHY_ENTRY
- NETDEV_ENTRY
- __field(bool, enabled)
- ),
- TP_fast_assign(
- WIPHY_ASSIGN;
- NETDEV_ASSIGN;
- __entry->enabled = enabled;
- ),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", unicast: %s",
- WIPHY_PR_ARG, NETDEV_PR_ARG,
- BOOL_TO_STR(__entry->enabled))
-);
-
-TRACE_EVENT(rdev_get_txq_stats,
- TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
- TP_ARGS(wiphy, wdev),
- TP_STRUCT__entry(
- WIPHY_ENTRY
- WDEV_ENTRY
- ),
- TP_fast_assign(
- WIPHY_ASSIGN;
- WDEV_ASSIGN;
- ),
- TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG)
-);
#endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 959ed3acd240..ef14d80ca03e 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -5,17 +5,20 @@
* Copyright 2007-2009 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
*/
#include <linux/export.h>
#include <linux/bitops.h>
#include <linux/etherdevice.h>
#include <linux/slab.h>
+#include <linux/ieee80211.h>
#include <net/cfg80211.h>
#include <net/ip.h>
#include <net/dsfield.h>
#include <linux/if_vlan.h>
#include <linux/mpls.h>
#include <linux/gcd.h>
+#include <linux/bitfield.h>
#include "core.h"
#include "rdev-ops.h"
@@ -88,7 +91,7 @@ int ieee80211_channel_to_frequency(int chan, enum nl80211_band band)
return 5000 + chan * 5;
break;
case NL80211_BAND_60GHZ:
- if (chan < 5)
+ if (chan < 7)
return 56160 + chan * 2160;
break;
default:
@@ -109,7 +112,7 @@ int ieee80211_frequency_to_channel(int freq)
return (freq - 4000) / 5;
else if (freq <= 45000) /* DMG band lower limit */
return (freq - 5000) / 5;
- else if (freq >= 58320 && freq <= 64800)
+ else if (freq >= 58320 && freq <= 70200)
return (freq - 56160) / 2160;
else
return 0;
@@ -1568,7 +1571,7 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef,
}
/* 56.16 GHz, channel 1..4 */
- if (freq >= 56160 + 2160 * 1 && freq <= 56160 + 2160 * 4) {
+ if (freq >= 56160 + 2160 * 1 && freq <= 56160 + 2160 * 6) {
if (chandef->width >= NL80211_CHAN_WIDTH_40)
return false;
@@ -1893,3 +1896,154 @@ EXPORT_SYMBOL(rfc1042_header);
const unsigned char bridge_tunnel_header[] __aligned(2) =
{ 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 };
EXPORT_SYMBOL(bridge_tunnel_header);
+
+/* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */
+struct iapp_layer2_update {
+ u8 da[ETH_ALEN]; /* broadcast */
+ u8 sa[ETH_ALEN]; /* STA addr */
+ __be16 len; /* 6 */
+ u8 dsap; /* 0 */
+ u8 ssap; /* 0 */
+ u8 control;
+ u8 xid_info[3];
+} __packed;
+
+void cfg80211_send_layer2_update(struct net_device *dev, const u8 *addr)
+{
+ struct iapp_layer2_update *msg;
+ struct sk_buff *skb;
+
+ /* Send Level 2 Update Frame to update forwarding tables in layer 2
+ * bridge devices */
+
+ skb = dev_alloc_skb(sizeof(*msg));
+ if (!skb)
+ return;
+ msg = skb_put(skb, sizeof(*msg));
+
+ /* 802.2 Type 1 Logical Link Control (LLC) Exchange Identifier (XID)
+ * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */
+
+ eth_broadcast_addr(msg->da);
+ ether_addr_copy(msg->sa, addr);
+ msg->len = htons(6);
+ msg->dsap = 0;
+ msg->ssap = 0x01; /* NULL LSAP, CR Bit: Response */
+ msg->control = 0xaf; /* XID response lsb.1111F101.
+ * F=0 (no poll command; unsolicited frame) */
+ msg->xid_info[0] = 0x81; /* XID format identifier */
+ msg->xid_info[1] = 1; /* LLC types/classes: Type 1 LLC */
+ msg->xid_info[2] = 0; /* XID sender's receive window size (RW) */
+
+ skb->dev = dev;
+ skb->protocol = eth_type_trans(skb, dev);
+ memset(skb->cb, 0, sizeof(skb->cb));
+ netif_rx_ni(skb);
+}
+EXPORT_SYMBOL(cfg80211_send_layer2_update);
+
+int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
+ enum ieee80211_vht_chanwidth bw,
+ int mcs, bool ext_nss_bw_capable)
+{
+ u16 map = le16_to_cpu(cap->supp_mcs.rx_mcs_map);
+ int max_vht_nss = 0;
+ int ext_nss_bw;
+ int supp_width;
+ int i, mcs_encoding;
+
+ if (map == 0xffff)
+ return 0;
+
+ if (WARN_ON(mcs > 9))
+ return 0;
+ if (mcs <= 7)
+ mcs_encoding = 0;
+ else if (mcs == 8)
+ mcs_encoding = 1;
+ else
+ mcs_encoding = 2;
+
+ /* find max_vht_nss for the given MCS */
+ for (i = 7; i >= 0; i--) {
+ int supp = (map >> (2 * i)) & 3;
+
+ if (supp == 3)
+ continue;
+
+ if (supp >= mcs_encoding) {
+ max_vht_nss = i;
+ break;
+ }
+ }
+
+ if (!(cap->supp_mcs.tx_mcs_map &
+ cpu_to_le16(IEEE80211_VHT_EXT_NSS_BW_CAPABLE)))
+ return max_vht_nss;
+
+ ext_nss_bw = le32_get_bits(cap->vht_cap_info,
+ IEEE80211_VHT_CAP_EXT_NSS_BW_MASK);
+ supp_width = le32_get_bits(cap->vht_cap_info,
+ IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK);
+
+ /* if not capable, treat ext_nss_bw as 0 */
+ if (!ext_nss_bw_capable)
+ ext_nss_bw = 0;
+
+ /* This is invalid */
+ if (supp_width == 3)
+ return 0;
+
+ /* This is an invalid combination so pretend nothing is supported */
+ if (supp_width == 2 && (ext_nss_bw == 1 || ext_nss_bw == 2))
+ return 0;
+
+ /*
+ * Cover all the special cases according to IEEE 802.11-2016
+ * Table 9-250. All other cases are either factor of 1 or not
+ * valid/supported.
+ */
+ switch (bw) {
+ case IEEE80211_VHT_CHANWIDTH_USE_HT:
+ case IEEE80211_VHT_CHANWIDTH_80MHZ:
+ if ((supp_width == 1 || supp_width == 2) &&
+ ext_nss_bw == 3)
+ return 2 * max_vht_nss;
+ break;
+ case IEEE80211_VHT_CHANWIDTH_160MHZ:
+ if (supp_width == 0 &&
+ (ext_nss_bw == 1 || ext_nss_bw == 2))
+ return DIV_ROUND_UP(max_vht_nss, 2);
+ if (supp_width == 0 &&
+ ext_nss_bw == 3)
+ return DIV_ROUND_UP(3 * max_vht_nss, 4);
+ if (supp_width == 1 &&
+ ext_nss_bw == 3)
+ return 2 * max_vht_nss;
+ break;
+ case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
+ if (supp_width == 0 &&
+ (ext_nss_bw == 1 || ext_nss_bw == 2))
+ return 0; /* not possible */
+ if (supp_width == 0 &&
+ ext_nss_bw == 2)
+ return DIV_ROUND_UP(max_vht_nss, 2);
+ if (supp_width == 0 &&
+ ext_nss_bw == 3)
+ return DIV_ROUND_UP(3 * max_vht_nss, 4);
+ if (supp_width == 1 &&
+ ext_nss_bw == 0)
+ return 0; /* not possible */
+ if (supp_width == 1 &&
+ ext_nss_bw == 1)
+ return DIV_ROUND_UP(max_vht_nss, 2);
+ if (supp_width == 1 &&
+ ext_nss_bw == 2)
+ return DIV_ROUND_UP(3 * max_vht_nss, 4);
+ break;
+ }
+
+ /* not covered or invalid combination received */
+ return max_vht_nss;
+}
+EXPORT_SYMBOL(ieee80211_get_vht_max_nss);
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index bfe2dbea480b..a264cf2accd0 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -32,37 +32,49 @@ void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
{
unsigned long flags;
- if (xs->dev) {
- spin_lock_irqsave(&umem->xsk_list_lock, flags);
- list_del_rcu(&xs->list);
- spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
-
- if (umem->zc)
- synchronize_net();
- }
+ spin_lock_irqsave(&umem->xsk_list_lock, flags);
+ list_del_rcu(&xs->list);
+ spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
}
-int xdp_umem_query(struct net_device *dev, u16 queue_id)
+/* The umem is stored both in the _rx struct and the _tx struct as we do
+ * not know if the device has more tx queues than rx, or the opposite.
+ * This might also change during run time.
+ */
+static void xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem,
+ u16 queue_id)
{
- struct netdev_bpf bpf;
+ if (queue_id < dev->real_num_rx_queues)
+ dev->_rx[queue_id].umem = umem;
+ if (queue_id < dev->real_num_tx_queues)
+ dev->_tx[queue_id].umem = umem;
+}
- ASSERT_RTNL();
+struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
+ u16 queue_id)
+{
+ if (queue_id < dev->real_num_rx_queues)
+ return dev->_rx[queue_id].umem;
+ if (queue_id < dev->real_num_tx_queues)
+ return dev->_tx[queue_id].umem;
- memset(&bpf, 0, sizeof(bpf));
- bpf.command = XDP_QUERY_XSK_UMEM;
- bpf.xsk.queue_id = queue_id;
+ return NULL;
+}
- if (!dev->netdev_ops->ndo_bpf)
- return 0;
- return dev->netdev_ops->ndo_bpf(dev, &bpf) ?: !!bpf.xsk.umem;
+static void xdp_clear_umem_at_qid(struct net_device *dev, u16 queue_id)
+{
+ if (queue_id < dev->real_num_rx_queues)
+ dev->_rx[queue_id].umem = NULL;
+ if (queue_id < dev->real_num_tx_queues)
+ dev->_tx[queue_id].umem = NULL;
}
int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
- u32 queue_id, u16 flags)
+ u16 queue_id, u16 flags)
{
bool force_zc, force_copy;
struct netdev_bpf bpf;
- int err;
+ int err = 0;
force_zc = flags & XDP_ZEROCOPY;
force_copy = flags & XDP_COPY;
@@ -70,19 +82,23 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
if (force_zc && force_copy)
return -EINVAL;
- if (force_copy)
- return 0;
-
- if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit)
- return force_zc ? -EOPNOTSUPP : 0; /* fail or fallback */
+ rtnl_lock();
+ if (xdp_get_umem_from_qid(dev, queue_id)) {
+ err = -EBUSY;
+ goto out_rtnl_unlock;
+ }
- bpf.command = XDP_QUERY_XSK_UMEM;
+ xdp_reg_umem_at_qid(dev, umem, queue_id);
+ umem->dev = dev;
+ umem->queue_id = queue_id;
+ if (force_copy)
+ /* For copy-mode, we are done. */
+ goto out_rtnl_unlock;
- rtnl_lock();
- err = xdp_umem_query(dev, queue_id);
- if (err) {
- err = err < 0 ? -EOPNOTSUPP : -EBUSY;
- goto err_rtnl_unlock;
+ if (!dev->netdev_ops->ndo_bpf ||
+ !dev->netdev_ops->ndo_xsk_async_xmit) {
+ err = -EOPNOTSUPP;
+ goto err_unreg_umem;
}
bpf.command = XDP_SETUP_XSK_UMEM;
@@ -91,18 +107,20 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
err = dev->netdev_ops->ndo_bpf(dev, &bpf);
if (err)
- goto err_rtnl_unlock;
+ goto err_unreg_umem;
rtnl_unlock();
dev_hold(dev);
- umem->dev = dev;
- umem->queue_id = queue_id;
umem->zc = true;
return 0;
-err_rtnl_unlock:
+err_unreg_umem:
+ xdp_clear_umem_at_qid(dev, queue_id);
+ if (!force_zc)
+ err = 0; /* fallback to copy mode */
+out_rtnl_unlock:
rtnl_unlock();
- return force_zc ? err : 0; /* fail or fallback */
+ return err;
}
static void xdp_umem_clear_dev(struct xdp_umem *umem)
@@ -110,7 +128,7 @@ static void xdp_umem_clear_dev(struct xdp_umem *umem)
struct netdev_bpf bpf;
int err;
- if (umem->dev) {
+ if (umem->zc) {
bpf.command = XDP_SETUP_XSK_UMEM;
bpf.xsk.umem = NULL;
bpf.xsk.queue_id = umem->queue_id;
@@ -121,9 +139,17 @@ static void xdp_umem_clear_dev(struct xdp_umem *umem)
if (err)
WARN(1, "failed to disable umem!\n");
+ }
+
+ if (umem->dev) {
+ rtnl_lock();
+ xdp_clear_umem_at_qid(umem->dev, umem->queue_id);
+ rtnl_unlock();
+ }
+ if (umem->zc) {
dev_put(umem->dev);
- umem->dev = NULL;
+ umem->zc = false;
}
}
@@ -167,6 +193,8 @@ static void xdp_umem_release(struct xdp_umem *umem)
umem->cq = NULL;
}
+ xsk_reuseq_destroy(umem);
+
xdp_umem_unpin_pages(umem);
task = get_pid_task(umem->pid, PIDTYPE_PID);
@@ -314,8 +342,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->pid = get_task_pid(current, PIDTYPE_PID);
umem->address = (unsigned long)addr;
- umem->props.chunk_mask = ~((u64)chunk_size - 1);
- umem->props.size = size;
+ umem->chunk_mask = ~((u64)chunk_size - 1);
+ umem->size = size;
umem->headroom = headroom;
umem->chunk_size_nohr = chunk_size - headroom;
umem->npgs = size / PAGE_SIZE;
diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h
index f11560334f88..27603227601b 100644
--- a/net/xdp/xdp_umem.h
+++ b/net/xdp/xdp_umem.h
@@ -8,18 +8,8 @@
#include <net/xdp_sock.h>
-static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
-{
- return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1));
-}
-
-static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
-{
- return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1));
-}
-
int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
- u32 queue_id, u16 flags);
+ u16 queue_id, u16 flags);
bool xdp_umem_validate_queues(struct xdp_umem *umem);
void xdp_get_umem(struct xdp_umem *umem);
void xdp_put_umem(struct xdp_umem *umem);
diff --git a/net/xdp/xdp_umem_props.h b/net/xdp/xdp_umem_props.h
deleted file mode 100644
index 40eab10dfc49..000000000000
--- a/net/xdp/xdp_umem_props.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* XDP user-space packet buffer
- * Copyright(c) 2018 Intel Corporation.
- */
-
-#ifndef XDP_UMEM_PROPS_H_
-#define XDP_UMEM_PROPS_H_
-
-struct xdp_umem_props {
- u64 chunk_mask;
- u64 size;
-};
-
-#endif /* XDP_UMEM_PROPS_H_ */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 4e937cd7c17d..0577cd49aa72 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -55,20 +55,30 @@ EXPORT_SYMBOL(xsk_umem_discard_addr);
static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
- void *buffer;
+ void *to_buf, *from_buf;
+ u32 metalen;
u64 addr;
int err;
if (!xskq_peek_addr(xs->umem->fq, &addr) ||
- len > xs->umem->chunk_size_nohr) {
+ len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
xs->rx_dropped++;
return -ENOSPC;
}
addr += xs->umem->headroom;
- buffer = xdp_umem_get_data(xs->umem, addr);
- memcpy(buffer, xdp->data, len);
+ if (unlikely(xdp_data_meta_unsupported(xdp))) {
+ from_buf = xdp->data;
+ metalen = 0;
+ } else {
+ from_buf = xdp->data_meta;
+ metalen = xdp->data - xdp->data_meta;
+ }
+
+ to_buf = xdp_umem_get_data(xs->umem, addr);
+ memcpy(to_buf, from_buf, len + metalen);
+ addr += metalen;
err = xskq_produce_batch_desc(xs->rx, addr, len);
if (!err) {
xskq_discard_addr(xs->umem->fq);
@@ -111,6 +121,7 @@ void xsk_flush(struct xdp_sock *xs)
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
+ u32 metalen = xdp->data - xdp->data_meta;
u32 len = xdp->data_end - xdp->data;
void *buffer;
u64 addr;
@@ -120,7 +131,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
return -EINVAL;
if (!xskq_peek_addr(xs->umem->fq, &addr) ||
- len > xs->umem->chunk_size_nohr) {
+ len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
xs->rx_dropped++;
return -ENOSPC;
}
@@ -128,7 +139,8 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
addr += xs->umem->headroom;
buffer = xdp_umem_get_data(xs->umem, addr);
- memcpy(buffer, xdp->data, len);
+ memcpy(buffer, xdp->data_meta, len + metalen);
+ addr += metalen;
err = xskq_produce_batch_desc(xs->rx, addr, len);
if (!err) {
xskq_discard_addr(xs->umem->fq);
@@ -343,12 +355,18 @@ static int xsk_release(struct socket *sock)
local_bh_enable();
if (xs->dev) {
+ struct net_device *dev = xs->dev;
+
/* Wait for driver to stop using the xdp socket. */
- synchronize_net();
- dev_put(xs->dev);
+ xdp_del_sk_umem(xs->umem, xs);
xs->dev = NULL;
+ synchronize_net();
+ dev_put(dev);
}
+ xskq_destroy(xs->rx);
+ xskq_destroy(xs->tx);
+
sock_orphan(sk);
sock->sk = NULL;
@@ -407,13 +425,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
}
qid = sxdp->sxdp_queue_id;
-
- if ((xs->rx && qid >= dev->real_num_rx_queues) ||
- (xs->tx && qid >= dev->real_num_tx_queues)) {
- err = -EINVAL;
- goto out_unlock;
- }
-
flags = sxdp->sxdp_flags;
if (flags & XDP_SHARED_UMEM) {
@@ -458,8 +469,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
goto out_unlock;
} else {
/* This xsk has its own umem. */
- xskq_set_umem(xs->umem->fq, &xs->umem->props);
- xskq_set_umem(xs->umem->cq, &xs->umem->props);
+ xskq_set_umem(xs->umem->fq, xs->umem->size,
+ xs->umem->chunk_mask);
+ xskq_set_umem(xs->umem->cq, xs->umem->size,
+ xs->umem->chunk_mask);
err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
if (err)
@@ -469,8 +482,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xs->dev = dev;
xs->zc = xs->umem->zc;
xs->queue_id = qid;
- xskq_set_umem(xs->rx, &xs->umem->props);
- xskq_set_umem(xs->tx, &xs->umem->props);
+ xskq_set_umem(xs->rx, xs->umem->size, xs->umem->chunk_mask);
+ xskq_set_umem(xs->tx, xs->umem->size, xs->umem->chunk_mask);
xdp_add_sk_umem(xs->umem, xs);
out_unlock:
@@ -707,9 +720,6 @@ static void xsk_destruct(struct sock *sk)
if (!sock_flag(sk, SOCK_DEAD))
return;
- xskq_destroy(xs->rx);
- xskq_destroy(xs->tx);
- xdp_del_sk_umem(xs->umem, xs);
xdp_put_umem(xs->umem);
sk_refcnt_debug_dec(sk);
diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c
index 6c32e92e98fc..b66504592d9b 100644
--- a/net/xdp/xsk_queue.c
+++ b/net/xdp/xsk_queue.c
@@ -3,16 +3,19 @@
* Copyright(c) 2018 Intel Corporation.
*/
+#include <linux/log2.h>
#include <linux/slab.h>
+#include <linux/overflow.h>
#include "xsk_queue.h"
-void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props)
+void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask)
{
if (!q)
return;
- q->umem_props = *umem_props;
+ q->size = size;
+ q->chunk_mask = chunk_mask;
}
static u32 xskq_umem_get_ring_size(struct xsk_queue *q)
@@ -61,3 +64,56 @@ void xskq_destroy(struct xsk_queue *q)
page_frag_free(q->ring);
kfree(q);
}
+
+struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries)
+{
+ struct xdp_umem_fq_reuse *newq;
+
+ /* Check for overflow */
+ if (nentries > (u32)roundup_pow_of_two(nentries))
+ return NULL;
+ nentries = roundup_pow_of_two(nentries);
+
+ newq = kvmalloc(struct_size(newq, handles, nentries), GFP_KERNEL);
+ if (!newq)
+ return NULL;
+ memset(newq, 0, offsetof(typeof(*newq), handles));
+
+ newq->nentries = nentries;
+ return newq;
+}
+EXPORT_SYMBOL_GPL(xsk_reuseq_prepare);
+
+struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
+ struct xdp_umem_fq_reuse *newq)
+{
+ struct xdp_umem_fq_reuse *oldq = umem->fq_reuse;
+
+ if (!oldq) {
+ umem->fq_reuse = newq;
+ return NULL;
+ }
+
+ if (newq->nentries < oldq->length)
+ return newq;
+
+ memcpy(newq->handles, oldq->handles,
+ array_size(oldq->length, sizeof(u64)));
+ newq->length = oldq->length;
+
+ umem->fq_reuse = newq;
+ return oldq;
+}
+EXPORT_SYMBOL_GPL(xsk_reuseq_swap);
+
+void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq)
+{
+ kvfree(rq);
+}
+EXPORT_SYMBOL_GPL(xsk_reuseq_free);
+
+void xsk_reuseq_destroy(struct xdp_umem *umem)
+{
+ xsk_reuseq_free(umem->fq_reuse);
+ umem->fq_reuse = NULL;
+}
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 8a64b150be54..bcb5cbb40419 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -31,7 +31,8 @@ struct xdp_umem_ring {
};
struct xsk_queue {
- struct xdp_umem_props umem_props;
+ u64 chunk_mask;
+ u64 size;
u32 ring_mask;
u32 nentries;
u32 prod_head;
@@ -78,7 +79,7 @@ static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt)
static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
{
- if (addr >= q->umem_props.size) {
+ if (addr >= q->size) {
q->invalid_descs++;
return false;
}
@@ -92,7 +93,7 @@ static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr)
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
unsigned int idx = q->cons_tail & q->ring_mask;
- *addr = READ_ONCE(ring->desc[idx]) & q->umem_props.chunk_mask;
+ *addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask;
if (xskq_is_valid_addr(q, *addr))
return addr;
@@ -173,8 +174,8 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
if (!xskq_is_valid_addr(q, d->addr))
return false;
- if (((d->addr + d->len) & q->umem_props.chunk_mask) !=
- (d->addr & q->umem_props.chunk_mask)) {
+ if (((d->addr + d->len) & q->chunk_mask) !=
+ (d->addr & q->chunk_mask)) {
q->invalid_descs++;
return false;
}
@@ -253,8 +254,11 @@ static inline bool xskq_empty_desc(struct xsk_queue *q)
return xskq_nb_free(q, q->prod_tail, q->nentries) == q->nentries;
}
-void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props);
+void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask);
struct xsk_queue *xskq_create(u32 nentries, bool umem_queue);
void xskq_destroy(struct xsk_queue *q_ops);
+/* Executed by the core when the entire UMEM gets freed */
+void xsk_reuseq_destroy(struct xdp_umem *umem);
+
#endif /* _LINUX_XSK_QUEUE_H */
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 5611b7521020..144c137886b1 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -99,7 +99,7 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
do {
struct sk_buff *nskb = skb2->next;
- skb2->next = NULL;
+ skb_mark_not_on_list(skb2);
xo = xfrm_offload(skb2);
xo->flags |= XFRM_DEV_RESUME;
@@ -192,9 +192,13 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
err = dev->xfrmdev_ops->xdo_dev_state_add(x);
if (err) {
+ xso->num_exthdrs = 0;
+ xso->flags = 0;
xso->dev = NULL;
dev_put(dev);
- return err;
+
+ if (err != -EOPNOTSUPP)
+ return err;
}
return 0;
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 31acc6f33d98..dc5b20bf29cf 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -469,9 +469,9 @@ static int xfrmi4_err(struct sk_buff *skb, u32 info)
}
if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
- ipv4_update_pmtu(skb, net, info, 0, 0, protocol, 0);
+ ipv4_update_pmtu(skb, net, info, 0, protocol);
else
- ipv4_redirect(skb, net, 0, 0, protocol, 0);
+ ipv4_redirect(skb, net, 0, protocol);
xfrm_state_put(x);
return 0;
@@ -742,7 +742,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-struct net *xfrmi_get_link_net(const struct net_device *dev)
+static struct net *xfrmi_get_link_net(const struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 261995d37ced..4ae87c5ce2e3 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -193,7 +193,7 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb
struct sk_buff *nskb = segs->next;
int err;
- segs->next = NULL;
+ skb_mark_not_on_list(segs);
err = xfrm_output2(net, sk, segs);
if (unlikely(err)) {
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index df7ca2dabc48..ca7a207b81a9 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1007,7 +1007,7 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
int err;
err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX, xfrma_policy,
- NULL);
+ cb->extack);
if (err < 0)
return err;